summaryrefslogtreecommitdiff
path: root/ext/json/ext/parser
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/ext/parser')
-rw-r--r--ext/json/ext/parser/extconf.h3
-rw-r--r--ext/json/ext/parser/extconf.rb9
-rw-r--r--ext/json/ext/parser/parser.c1601
-rw-r--r--ext/json/ext/parser/parser.rl569
-rw-r--r--ext/json/ext/parser/unicode.c156
-rwxr-xr-xext/json/ext/parser/unicode.h58
6 files changed, 2396 insertions, 0 deletions
diff --git a/ext/json/ext/parser/extconf.h b/ext/json/ext/parser/extconf.h
new file mode 100644
index 0000000000..cda0cc8ea5
--- /dev/null
+++ b/ext/json/ext/parser/extconf.h
@@ -0,0 +1,3 @@
+#ifndef EXTCONF_H
+#define EXTCONF_H
+#endif
diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb
new file mode 100644
index 0000000000..085c8d060c
--- /dev/null
+++ b/ext/json/ext/parser/extconf.rb
@@ -0,0 +1,9 @@
+require 'mkmf'
+require 'rbconfig'
+
+if CONFIG['CC'] =~ /gcc/
+ #CONFIG['CC'] += ' -Wall -ggdb'
+ CONFIG['CC'] += ' -Wall'
+end
+
+create_makefile 'json/ext/parser'
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
new file mode 100644
index 0000000000..7448e5fb7a
--- /dev/null
+++ b/ext/json/ext/parser/parser.c
@@ -0,0 +1,1601 @@
+#line 1 "parser.rl"
+/* vim: set cin et sw=4 ts=4: */
+
+#include "ruby.h"
+#include "re.h"
+#include "st.h"
+#include "unicode.h"
+
+#define EVIL 0x666
+
+static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
+
+static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting;
+
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ int max_nesting;
+ int current_nesting;
+} JSON_Parser;
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+
+#define GET_STRUCT \
+ JSON_Parser *json; \
+ Data_Get_Struct(self, JSON_Parser, json);
+
+#line 58 "parser.rl"
+
+
+
+#line 41 "parser.c"
+static const int JSON_object_start = 1;
+static const int JSON_object_first_final = 27;
+static const int JSON_object_error = 0;
+
+static const int JSON_object_en_main = 1;
+
+#line 91 "parser.rl"
+
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+ VALUE last_name = Qnil;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+
+ *result = rb_hash_new();
+
+
+#line 63 "parser.c"
+ {
+ cs = JSON_object_start;
+ }
+#line 105 "parser.rl"
+
+#line 69 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ if ( (*p) == 123 )
+ goto st2;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 13: goto st2;
+ case 32: goto st2;
+ case 34: goto tr2;
+ case 47: goto st23;
+ case 125: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st2;
+ goto st0;
+tr2:
+#line 77 "parser.rl"
+ {
+ char *np = JSON_parse_string(json, p, pe, &last_name);
+ if (np == NULL) goto _out3; else {p = (( np))-1;}
+ }
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 106 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st3;
+ case 32: goto st3;
+ case 47: goto st4;
+ case 58: goto st8;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st3;
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ switch( (*p) ) {
+ case 42: goto st5;
+ case 47: goto st7;
+ }
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) == 42 )
+ goto st6;
+ goto st5;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 42: goto st6;
+ case 47: goto st3;
+ }
+ goto st5;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 10 )
+ goto st3;
+ goto st7;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ switch( (*p) ) {
+ case 13: goto st8;
+ case 32: goto st8;
+ case 34: goto tr11;
+ case 45: goto tr11;
+ case 47: goto st19;
+ case 91: goto tr11;
+ case 102: goto tr11;
+ case 110: goto tr11;
+ case 116: goto tr11;
+ case 123: goto tr11;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr11;
+ } else if ( (*p) >= 9 )
+ goto st8;
+ goto st0;
+tr11:
+#line 66 "parser.rl"
+ {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, p, pe, &v);
+ if (np == NULL) {
+ goto _out9;
+ } else {
+ rb_hash_aset(*result, last_name, v);
+ {p = (( np))-1;}
+ }
+ }
+ goto st9;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+#line 187 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st9;
+ case 32: goto st9;
+ case 44: goto st10;
+ case 47: goto st15;
+ case 125: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st9;
+ goto st0;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+ switch( (*p) ) {
+ case 13: goto st10;
+ case 32: goto st10;
+ case 34: goto tr2;
+ case 47: goto st11;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st10;
+ goto st0;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ switch( (*p) ) {
+ case 42: goto st12;
+ case 47: goto st14;
+ }
+ goto st0;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+ if ( (*p) == 42 )
+ goto st13;
+ goto st12;
+st13:
+ if ( ++p == pe )
+ goto _out13;
+case 13:
+ switch( (*p) ) {
+ case 42: goto st13;
+ case 47: goto st10;
+ }
+ goto st12;
+st14:
+ if ( ++p == pe )
+ goto _out14;
+case 14:
+ if ( (*p) == 10 )
+ goto st10;
+ goto st14;
+st15:
+ if ( ++p == pe )
+ goto _out15;
+case 15:
+ switch( (*p) ) {
+ case 42: goto st16;
+ case 47: goto st18;
+ }
+ goto st0;
+st16:
+ if ( ++p == pe )
+ goto _out16;
+case 16:
+ if ( (*p) == 42 )
+ goto st17;
+ goto st16;
+st17:
+ if ( ++p == pe )
+ goto _out17;
+case 17:
+ switch( (*p) ) {
+ case 42: goto st17;
+ case 47: goto st9;
+ }
+ goto st16;
+st18:
+ if ( ++p == pe )
+ goto _out18;
+case 18:
+ if ( (*p) == 10 )
+ goto st9;
+ goto st18;
+tr4:
+#line 82 "parser.rl"
+ { goto _out27; }
+ goto st27;
+st27:
+ if ( ++p == pe )
+ goto _out27;
+case 27:
+#line 283 "parser.c"
+ goto st0;
+st19:
+ if ( ++p == pe )
+ goto _out19;
+case 19:
+ switch( (*p) ) {
+ case 42: goto st20;
+ case 47: goto st22;
+ }
+ goto st0;
+st20:
+ if ( ++p == pe )
+ goto _out20;
+case 20:
+ if ( (*p) == 42 )
+ goto st21;
+ goto st20;
+st21:
+ if ( ++p == pe )
+ goto _out21;
+case 21:
+ switch( (*p) ) {
+ case 42: goto st21;
+ case 47: goto st8;
+ }
+ goto st20;
+st22:
+ if ( ++p == pe )
+ goto _out22;
+case 22:
+ if ( (*p) == 10 )
+ goto st8;
+ goto st22;
+st23:
+ if ( ++p == pe )
+ goto _out23;
+case 23:
+ switch( (*p) ) {
+ case 42: goto st24;
+ case 47: goto st26;
+ }
+ goto st0;
+st24:
+ if ( ++p == pe )
+ goto _out24;
+case 24:
+ if ( (*p) == 42 )
+ goto st25;
+ goto st24;
+st25:
+ if ( ++p == pe )
+ goto _out25;
+case 25:
+ switch( (*p) ) {
+ case 42: goto st25;
+ case 47: goto st2;
+ }
+ goto st24;
+st26:
+ if ( ++p == pe )
+ goto _out26;
+case 26:
+ if ( (*p) == 10 )
+ goto st2;
+ goto st26;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+ _out12: cs = 12; goto _out;
+ _out13: cs = 13; goto _out;
+ _out14: cs = 14; goto _out;
+ _out15: cs = 15; goto _out;
+ _out16: cs = 16; goto _out;
+ _out17: cs = 17; goto _out;
+ _out18: cs = 18; goto _out;
+ _out27: cs = 27; goto _out;
+ _out19: cs = 19; goto _out;
+ _out20: cs = 20; goto _out;
+ _out21: cs = 21; goto _out;
+ _out22: cs = 22; goto _out;
+ _out23: cs = 23; goto _out;
+ _out24: cs = 24; goto _out;
+ _out25: cs = 25; goto _out;
+ _out26: cs = 26; goto _out;
+
+ _out: {}
+ }
+#line 106 "parser.rl"
+
+ if (cs >= JSON_object_first_final) {
+ VALUE klassname = rb_hash_aref(*result, json->create_id);
+ if (!NIL_P(klassname)) {
+ VALUE klass = rb_path2class(StringValueCStr(klassname));
+ if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
+ *result = rb_funcall(klass, i_json_create, 1, *result);
+ }
+ }
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+#line 397 "parser.c"
+static const int JSON_value_start = 1;
+static const int JSON_value_first_final = 12;
+static const int JSON_value_error = 0;
+
+static const int JSON_value_en_main = 1;
+
+#line 177 "parser.rl"
+
+
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+
+#line 412 "parser.c"
+ {
+ cs = JSON_value_start;
+ }
+#line 184 "parser.rl"
+
+#line 418 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ switch( (*p) ) {
+ case 34: goto tr0;
+ case 45: goto tr2;
+ case 91: goto tr3;
+ case 102: goto st2;
+ case 110: goto st6;
+ case 116: goto st9;
+ case 123: goto tr7;
+ }
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ goto st0;
+st0:
+ goto _out0;
+tr0:
+#line 136 "parser.rl"
+ {
+ char *np = JSON_parse_string(json, p, pe, result);
+ if (np == NULL) goto _out12; else {p = (( np))-1;}
+ }
+ goto st12;
+tr2:
+#line 141 "parser.rl"
+ {
+ char *np;
+ np = JSON_parse_float(json, p, pe, result);
+ if (np != NULL) {p = (( np))-1;}
+ np = JSON_parse_integer(json, p, pe, result);
+ if (np != NULL) {p = (( np))-1;}
+ goto _out12;
+ }
+ goto st12;
+tr3:
+#line 150 "parser.rl"
+ {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_array(json, p, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) goto _out12; else {p = (( np))-1;}
+ }
+ goto st12;
+tr7:
+#line 158 "parser.rl"
+ {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_object(json, p, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) goto _out12; else {p = (( np))-1;}
+ }
+ goto st12;
+tr11:
+#line 130 "parser.rl"
+ {
+ *result = Qfalse;
+ }
+ goto st12;
+tr14:
+#line 127 "parser.rl"
+ {
+ *result = Qnil;
+ }
+ goto st12;
+tr17:
+#line 133 "parser.rl"
+ {
+ *result = Qtrue;
+ }
+ goto st12;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+#line 166 "parser.rl"
+ { goto _out12; }
+#line 501 "parser.c"
+ goto st0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 97 )
+ goto st3;
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( (*p) == 108 )
+ goto st4;
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( (*p) == 115 )
+ goto st5;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) == 101 )
+ goto tr11;
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) == 117 )
+ goto st7;
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 108 )
+ goto st8;
+ goto st0;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ if ( (*p) == 108 )
+ goto tr14;
+ goto st0;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ if ( (*p) == 114 )
+ goto st10;
+ goto st0;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+ if ( (*p) == 117 )
+ goto st11;
+ goto st0;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ if ( (*p) == 101 )
+ goto tr17;
+ goto st0;
+ }
+ _out0: cs = 0; goto _out;
+ _out12: cs = 12; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+
+ _out: {}
+ }
+#line 185 "parser.rl"
+
+ if (cs >= JSON_value_first_final) {
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+
+#line 599 "parser.c"
+static const int JSON_integer_start = 1;
+static const int JSON_integer_first_final = 5;
+static const int JSON_integer_error = 0;
+
+static const int JSON_integer_en_main = 1;
+
+#line 201 "parser.rl"
+
+
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+
+#line 614 "parser.c"
+ {
+ cs = JSON_integer_start;
+ }
+#line 208 "parser.rl"
+ json->memo = p;
+
+#line 621 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ switch( (*p) ) {
+ case 45: goto st2;
+ case 48: goto st3;
+ }
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st4;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 48 )
+ goto st3;
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st4;
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st0;
+ goto tr4;
+tr4:
+#line 198 "parser.rl"
+ { goto _out5; }
+ goto st5;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+#line 661 "parser.c"
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st4;
+ goto tr4;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out5: cs = 5; goto _out;
+ _out4: cs = 4; goto _out;
+
+ _out: {}
+ }
+#line 210 "parser.rl"
+
+ if (cs >= JSON_integer_first_final) {
+ long len = p - json->memo;
+ *result = rb_Integer(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+#line 691 "parser.c"
+static const int JSON_float_start = 1;
+static const int JSON_float_first_final = 10;
+static const int JSON_float_error = 0;
+
+static const int JSON_float_en_main = 1;
+
+#line 232 "parser.rl"
+
+
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+
+#line 706 "parser.c"
+ {
+ cs = JSON_float_start;
+ }
+#line 239 "parser.rl"
+ json->memo = p;
+
+#line 713 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ switch( (*p) ) {
+ case 45: goto st2;
+ case 48: goto st3;
+ }
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st9;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ if ( (*p) == 48 )
+ goto st3;
+ if ( 49 <= (*p) && (*p) <= 57 )
+ goto st9;
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ switch( (*p) ) {
+ case 46: goto st4;
+ case 69: goto st6;
+ case 101: goto st6;
+ }
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st5;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ switch( (*p) ) {
+ case 69: goto st6;
+ case 101: goto st6;
+ }
+ if ( (*p) > 46 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st5;
+ } else if ( (*p) >= 45 )
+ goto st0;
+ goto tr7;
+tr7:
+#line 226 "parser.rl"
+ { goto _out10; }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 777 "parser.c"
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 43: goto st7;
+ case 45: goto st7;
+ }
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st8;
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st8;
+ goto st0;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ switch( (*p) ) {
+ case 69: goto st0;
+ case 101: goto st0;
+ }
+ if ( (*p) > 46 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st8;
+ } else if ( (*p) >= 45 )
+ goto st0;
+ goto tr7;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ switch( (*p) ) {
+ case 46: goto st4;
+ case 69: goto st6;
+ case 101: goto st6;
+ }
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st9;
+ goto st0;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out10: cs = 10; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+
+ _out: {}
+ }
+#line 241 "parser.rl"
+
+ if (cs >= JSON_float_first_final) {
+ long len = p - json->memo;
+ *result = rb_Float(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+
+#line 850 "parser.c"
+static const int JSON_array_start = 1;
+static const int JSON_array_first_final = 17;
+static const int JSON_array_error = 0;
+
+static const int JSON_array_en_main = 1;
+
+#line 277 "parser.rl"
+
+
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+ *result = rb_ary_new();
+
+
+#line 870 "parser.c"
+ {
+ cs = JSON_array_start;
+ }
+#line 289 "parser.rl"
+
+#line 876 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ if ( (*p) == 91 )
+ goto st2;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 13: goto st2;
+ case 32: goto st2;
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 47: goto st13;
+ case 91: goto tr2;
+ case 93: goto tr4;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ } else if ( (*p) >= 9 )
+ goto st2;
+ goto st0;
+tr2:
+#line 258 "parser.rl"
+ {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, p, pe, &v);
+ if (np == NULL) {
+ goto _out3;
+ } else {
+ rb_ary_push(*result, v);
+ {p = (( np))-1;}
+ }
+ }
+ goto st3;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+#line 928 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st3;
+ case 32: goto st3;
+ case 44: goto st4;
+ case 47: goto st9;
+ case 93: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st3;
+ goto st0;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ switch( (*p) ) {
+ case 13: goto st4;
+ case 32: goto st4;
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 47: goto st5;
+ case 91: goto tr2;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ } else if ( (*p) >= 9 )
+ goto st4;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ switch( (*p) ) {
+ case 42: goto st6;
+ case 47: goto st8;
+ }
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) == 42 )
+ goto st7;
+ goto st6;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ switch( (*p) ) {
+ case 42: goto st7;
+ case 47: goto st4;
+ }
+ goto st6;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ if ( (*p) == 10 )
+ goto st4;
+ goto st8;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ switch( (*p) ) {
+ case 42: goto st10;
+ case 47: goto st12;
+ }
+ goto st0;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+ if ( (*p) == 42 )
+ goto st11;
+ goto st10;
+st11:
+ if ( ++p == pe )
+ goto _out11;
+case 11:
+ switch( (*p) ) {
+ case 42: goto st11;
+ case 47: goto st3;
+ }
+ goto st10;
+st12:
+ if ( ++p == pe )
+ goto _out12;
+case 12:
+ if ( (*p) == 10 )
+ goto st3;
+ goto st12;
+tr4:
+#line 269 "parser.rl"
+ { goto _out17; }
+ goto st17;
+st17:
+ if ( ++p == pe )
+ goto _out17;
+case 17:
+#line 1033 "parser.c"
+ goto st0;
+st13:
+ if ( ++p == pe )
+ goto _out13;
+case 13:
+ switch( (*p) ) {
+ case 42: goto st14;
+ case 47: goto st16;
+ }
+ goto st0;
+st14:
+ if ( ++p == pe )
+ goto _out14;
+case 14:
+ if ( (*p) == 42 )
+ goto st15;
+ goto st14;
+st15:
+ if ( ++p == pe )
+ goto _out15;
+case 15:
+ switch( (*p) ) {
+ case 42: goto st15;
+ case 47: goto st2;
+ }
+ goto st14;
+st16:
+ if ( ++p == pe )
+ goto _out16;
+case 16:
+ if ( (*p) == 10 )
+ goto st2;
+ goto st16;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+ _out10: cs = 10; goto _out;
+ _out11: cs = 11; goto _out;
+ _out12: cs = 12; goto _out;
+ _out17: cs = 17; goto _out;
+ _out13: cs = 13; goto _out;
+ _out14: cs = 14; goto _out;
+ _out15: cs = 15; goto _out;
+ _out16: cs = 16; goto _out;
+
+ _out: {}
+ }
+#line 290 "parser.rl"
+
+ if(cs >= JSON_array_first_final) {
+ return p + 1;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static VALUE json_string_unescape(char *p, char *pe)
+{
+ VALUE result = rb_str_buf_new(pe - p + 1);
+
+ while (p < pe) {
+ if (*p == '\\') {
+ p++;
+ if (p >= pe) return Qnil; /* raise an exception later, \ at end */
+ switch (*p) {
+ case '"':
+ case '\\':
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ case 'b':
+ rb_str_buf_cat2(result, "\b");
+ p++;
+ break;
+ case 'f':
+ rb_str_buf_cat2(result, "\f");
+ p++;
+ break;
+ case 'n':
+ rb_str_buf_cat2(result, "\n");
+ p++;
+ break;
+ case 'r':
+ rb_str_buf_cat2(result, "\r");
+ p++;
+ break;
+ case 't':
+ rb_str_buf_cat2(result, "\t");
+ p++;
+ break;
+ case 'u':
+ if (p > pe - 4) {
+ return Qnil;
+ } else {
+ p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+ }
+ break;
+ default:
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ }
+ } else {
+ char *q = p;
+ while (*q != '\\' && q < pe) q++;
+ rb_str_buf_cat(result, p, q - p);
+ p = q;
+ }
+ }
+ return result;
+}
+
+
+#line 1154 "parser.c"
+static const int JSON_string_start = 1;
+static const int JSON_string_first_final = 8;
+static const int JSON_string_error = 0;
+
+static const int JSON_string_en_main = 1;
+
+#line 368 "parser.rl"
+
+
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ *result = rb_str_new("", 0);
+
+#line 1170 "parser.c"
+ {
+ cs = JSON_string_start;
+ }
+#line 376 "parser.rl"
+ json->memo = p;
+
+#line 1177 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+case 1:
+ if ( (*p) == 34 )
+ goto st2;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 34: goto tr2;
+ case 92: goto st3;
+ }
+ if ( 0 <= (*p) && (*p) <= 31 )
+ goto st0;
+ goto st2;
+tr2:
+#line 360 "parser.rl"
+ {
+ *result = json_string_unescape(json->memo + 1, p);
+ if (NIL_P(*result)) goto _out8; else {p = (( p + 1))-1;}
+ }
+#line 365 "parser.rl"
+ { goto _out8; }
+ goto st8;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+#line 1213 "parser.c"
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( (*p) == 117 )
+ goto st4;
+ if ( 0 <= (*p) && (*p) <= 31 )
+ goto st0;
+ goto st2;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st5;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st5;
+ } else
+ goto st5;
+ goto st0;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st6;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st6;
+ } else
+ goto st6;
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st7;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st7;
+ } else
+ goto st7;
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) < 65 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto st2;
+ } else if ( (*p) > 70 ) {
+ if ( 97 <= (*p) && (*p) <= 102 )
+ goto st2;
+ } else
+ goto st2;
+ goto st0;
+ }
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out8: cs = 8; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+
+ _out: {}
+ }
+#line 378 "parser.rl"
+
+ if (cs >= JSON_string_first_final) {
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+
+#line 1299 "parser.c"
+static const int JSON_start = 1;
+static const int JSON_first_final = 10;
+static const int JSON_error = 0;
+
+static const int JSON_en_main = 1;
+
+#line 412 "parser.rl"
+
+
+/*
+ * Document-class: JSON::Ext::Parser
+ *
+ * This is the JSON parser implemented as a C extension. It can be configured
+ * to be used by setting
+ *
+ * JSON.parser = JSON::Ext::Parser
+ *
+ * with the method parser= in JSON.
+ *
+ */
+
+/*
+ * call-seq: new(source, opts => {})
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * It will be configured by the _opts_ hash. _opts_ can have the following
+ * keys:
+ *
+ * _opts_ can have the following keys:
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
+ * structures. Disable depth checking with :max_nesting => false.
+ */
+static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
+{
+ char *ptr;
+ long len;
+ VALUE source, opts;
+ GET_STRUCT;
+ rb_scan_args(argc, argv, "11", &source, &opts);
+ source = StringValue(source);
+ ptr = RSTRING_PTR(source);
+ len = RSTRING_LEN(source);
+ if (len < 2) {
+ rb_raise(eParserError, "A JSON text must at least contain two octets!");
+ }
+ json->max_nesting = 19;
+ if (!NIL_P(opts)) {
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ if (NIL_P(opts)) {
+ rb_raise(rb_eArgError, "opts needs to be like a hash");
+ } else {
+ VALUE s_max_nesting = ID2SYM(i_max_nesting);
+ if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) {
+ VALUE max_nesting = rb_hash_aref(opts, s_max_nesting);
+ if (RTEST(max_nesting)) {
+ Check_Type(max_nesting, T_FIXNUM);
+ json->max_nesting = FIX2INT(max_nesting);
+ } else {
+ json->max_nesting = 0;
+ }
+ }
+ }
+ }
+ json->current_nesting = 0;
+ /*
+ Convert these?
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ }
+ */
+ json->len = len;
+ json->source = ptr;
+ json->Vsource = source;
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ return self;
+}
+
+/*
+ * call-seq: parse()
+ *
+ * Parses the current JSON text _source_ and returns the complete data
+ * structure as a result.
+ */
+static VALUE cParser_parse(VALUE self)
+{
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
+ GET_STRUCT;
+
+
+#line 1400 "parser.c"
+ {
+ cs = JSON_start;
+ }
+#line 505 "parser.rl"
+ p = json->source;
+ pe = p + json->len;
+
+#line 1408 "parser.c"
+ {
+ if ( p == pe )
+ goto _out;
+ switch ( cs )
+ {
+st1:
+ if ( ++p == pe )
+ goto _out1;
+case 1:
+ switch( (*p) ) {
+ case 13: goto st1;
+ case 32: goto st1;
+ case 47: goto st2;
+ case 91: goto tr3;
+ case 123: goto tr4;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st1;
+ goto st0;
+st0:
+ goto _out0;
+st2:
+ if ( ++p == pe )
+ goto _out2;
+case 2:
+ switch( (*p) ) {
+ case 42: goto st3;
+ case 47: goto st5;
+ }
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _out3;
+case 3:
+ if ( (*p) == 42 )
+ goto st4;
+ goto st3;
+st4:
+ if ( ++p == pe )
+ goto _out4;
+case 4:
+ switch( (*p) ) {
+ case 42: goto st4;
+ case 47: goto st1;
+ }
+ goto st3;
+st5:
+ if ( ++p == pe )
+ goto _out5;
+case 5:
+ if ( (*p) == 10 )
+ goto st1;
+ goto st5;
+tr3:
+#line 401 "parser.rl"
+ {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_array(json, p, pe, &result);
+ if (np == NULL) goto _out10; else {p = (( np))-1;}
+ }
+ goto st10;
+tr4:
+#line 394 "parser.rl"
+ {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_object(json, p, pe, &result);
+ if (np == NULL) goto _out10; else {p = (( np))-1;}
+ }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _out10;
+case 10:
+#line 1484 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st10;
+ case 32: goto st10;
+ case 47: goto st6;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st10;
+ goto st0;
+st6:
+ if ( ++p == pe )
+ goto _out6;
+case 6:
+ switch( (*p) ) {
+ case 42: goto st7;
+ case 47: goto st9;
+ }
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _out7;
+case 7:
+ if ( (*p) == 42 )
+ goto st8;
+ goto st7;
+st8:
+ if ( ++p == pe )
+ goto _out8;
+case 8:
+ switch( (*p) ) {
+ case 42: goto st8;
+ case 47: goto st10;
+ }
+ goto st7;
+st9:
+ if ( ++p == pe )
+ goto _out9;
+case 9:
+ if ( (*p) == 10 )
+ goto st10;
+ goto st9;
+ }
+ _out1: cs = 1; goto _out;
+ _out0: cs = 0; goto _out;
+ _out2: cs = 2; goto _out;
+ _out3: cs = 3; goto _out;
+ _out4: cs = 4; goto _out;
+ _out5: cs = 5; goto _out;
+ _out10: cs = 10; goto _out;
+ _out6: cs = 6; goto _out;
+ _out7: cs = 7; goto _out;
+ _out8: cs = 8; goto _out;
+ _out9: cs = 9; goto _out;
+
+ _out: {}
+ }
+#line 508 "parser.rl"
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static JSON_Parser *JSON_allocate()
+{
+ JSON_Parser *json = ALLOC(JSON_Parser);
+ MEMZERO(json, JSON_Parser, 1);
+ return json;
+}
+
+static void JSON_mark(JSON_Parser *json)
+{
+ rb_gc_mark_maybe(json->Vsource);
+ rb_gc_mark_maybe(json->create_id);
+}
+
+static void JSON_free(JSON_Parser *json)
+{
+ free(json);
+}
+
+static VALUE cJSON_parser_s_allocate(VALUE klass)
+{
+ JSON_Parser *json = JSON_allocate();
+ return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
+}
+
+/*
+ * call-seq: source()
+ *
+ * Returns a copy of the current _source_ string, that was used to construct
+ * this Parser.
+ */
+static VALUE cParser_source(VALUE self)
+{
+ GET_STRUCT;
+ return rb_str_dup(json->Vsource);
+}
+
+void Init_parser()
+{
+ mJSON = rb_define_module("JSON");
+ mExt = rb_define_module_under(mJSON, "Ext");
+ cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
+ eParserError = rb_path2class("JSON::ParserError");
+ eNestingError = rb_path2class("JSON::NestingError");
+ rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
+ rb_define_method(cParser, "initialize", cParser_initialize, -1);
+ rb_define_method(cParser, "parse", cParser_parse, 0);
+ rb_define_method(cParser, "source", cParser_source, 0);
+
+ i_json_creatable_p = rb_intern("json_creatable?");
+ i_json_create = rb_intern("json_create");
+ i_create_id = rb_intern("create_id");
+ i_chr = rb_intern("chr");
+ i_max_nesting = rb_intern("max_nesting");
+}
diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl
new file mode 100644
index 0000000000..9ce8c6fc24
--- /dev/null
+++ b/ext/json/ext/parser/parser.rl
@@ -0,0 +1,569 @@
+/* vim: set cin et sw=4 ts=4: */
+
+#include "ruby.h"
+#include "re.h"
+#include "st.h"
+#include "unicode.h"
+
+#define EVIL 0x666
+
+static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
+
+static ID i_json_creatable_p, i_json_create, i_create_id, i_chr, i_max_nesting;
+
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ int max_nesting;
+ int current_nesting;
+} JSON_Parser;
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+
+#define GET_STRUCT \
+ JSON_Parser *json; \
+ Data_Get_Struct(self, JSON_Parser, json);
+
+%%{
+ machine JSON_common;
+
+ cr = '\n';
+ cr_neg = [^\n];
+ ws = [ \t\r\n];
+ c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
+ cpp_comment = '//' cr_neg* cr;
+ comment = c_comment | cpp_comment;
+ ignore = ws | comment;
+ name_separator = ':';
+ value_separator = ',';
+ Vnull = 'null';
+ Vfalse = 'false';
+ Vtrue = 'true';
+ begin_value = [nft"\-[{] | digit;
+ begin_object = '{';
+ end_object = '}';
+ begin_array = '[';
+ end_array = ']';
+ begin_string = '"';
+ begin_name = begin_string;
+ begin_number = digit | '-';
+}%%
+
+%%{
+ machine JSON_object;
+ include JSON_common;
+
+ write data;
+
+ action parse_value {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, fpc, pe, &v);
+ if (np == NULL) {
+ fbreak;
+ } else {
+ rb_hash_aset(*result, last_name, v);
+ fexec np;
+ }
+ }
+
+ action parse_name {
+ char *np = JSON_parse_string(json, fpc, pe, &last_name);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action exit { fbreak; }
+
+ a_pair = ignore* begin_name >parse_name
+ ignore* name_separator ignore*
+ begin_value >parse_value;
+
+ main := begin_object
+ (a_pair (ignore* value_separator a_pair)*)?
+ ignore* end_object @exit;
+}%%
+
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+ VALUE last_name = Qnil;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+
+ *result = rb_hash_new();
+
+ %% write init;
+ %% write exec;
+
+ if (cs >= JSON_object_first_final) {
+ VALUE klassname = rb_hash_aref(*result, json->create_id);
+ if (!NIL_P(klassname)) {
+ VALUE klass = rb_path2class(StringValueCStr(klassname));
+ if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
+ *result = rb_funcall(klass, i_json_create, 1, *result);
+ }
+ }
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+%%{
+ machine JSON_value;
+ include JSON_common;
+
+ write data;
+
+ action parse_null {
+ *result = Qnil;
+ }
+ action parse_false {
+ *result = Qfalse;
+ }
+ action parse_true {
+ *result = Qtrue;
+ }
+ action parse_string {
+ char *np = JSON_parse_string(json, fpc, pe, result);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action parse_number {
+ char *np;
+ np = JSON_parse_float(json, fpc, pe, result);
+ if (np != NULL) fexec np;
+ np = JSON_parse_integer(json, fpc, pe, result);
+ if (np != NULL) fexec np;
+ fbreak;
+ }
+
+ action parse_array {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_array(json, fpc, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action parse_object {
+ char *np;
+ json->current_nesting += 1;
+ np = JSON_parse_object(json, fpc, pe, result);
+ json->current_nesting -= 1;
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action exit { fbreak; }
+
+main := (
+ Vnull @parse_null |
+ Vfalse @parse_false |
+ Vtrue @parse_true |
+ begin_number >parse_number |
+ begin_string >parse_string |
+ begin_array >parse_array |
+ begin_object >parse_object
+ ) %*exit;
+}%%
+
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ %% write init;
+ %% write exec;
+
+ if (cs >= JSON_value_first_final) {
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+%%{
+ machine JSON_integer;
+
+ write data;
+
+ action exit { fbreak; }
+
+ main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit);
+}%%
+
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ %% write init;
+ json->memo = p;
+ %% write exec;
+
+ if (cs >= JSON_integer_first_final) {
+ long len = p - json->memo;
+ *result = rb_Integer(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+%%{
+ machine JSON_float;
+ include JSON_common;
+
+ write data;
+
+ action exit { fbreak; }
+
+ main := '-'? (
+ (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
+ | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
+ ) (^[0-9Ee.\-] @exit );
+}%%
+
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ %% write init;
+ json->memo = p;
+ %% write exec;
+
+ if (cs >= JSON_float_first_final) {
+ long len = p - json->memo;
+ *result = rb_Float(rb_str_new(json->memo, len));
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+%%{
+ machine JSON_array;
+ include JSON_common;
+
+ write data;
+
+ action parse_value {
+ VALUE v = Qnil;
+ char *np = JSON_parse_value(json, fpc, pe, &v);
+ if (np == NULL) {
+ fbreak;
+ } else {
+ rb_ary_push(*result, v);
+ fexec np;
+ }
+ }
+
+ action exit { fbreak; }
+
+ next_element = value_separator ignore* begin_value >parse_value;
+
+ main := begin_array ignore*
+ ((begin_value >parse_value ignore*)
+ (ignore* next_element ignore*)*)?
+ end_array @exit;
+}%%
+
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ if (json->max_nesting && json->current_nesting > json->max_nesting) {
+ rb_raise(eNestingError, "nesting of %d is to deep", json->current_nesting);
+ }
+ *result = rb_ary_new();
+
+ %% write init;
+ %% write exec;
+
+ if(cs >= JSON_array_first_final) {
+ return p + 1;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static VALUE json_string_unescape(char *p, char *pe)
+{
+ VALUE result = rb_str_buf_new(pe - p + 1);
+
+ while (p < pe) {
+ if (*p == '\\') {
+ p++;
+ if (p >= pe) return Qnil; /* raise an exception later, \ at end */
+ switch (*p) {
+ case '"':
+ case '\\':
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ case 'b':
+ rb_str_buf_cat2(result, "\b");
+ p++;
+ break;
+ case 'f':
+ rb_str_buf_cat2(result, "\f");
+ p++;
+ break;
+ case 'n':
+ rb_str_buf_cat2(result, "\n");
+ p++;
+ break;
+ case 'r':
+ rb_str_buf_cat2(result, "\r");
+ p++;
+ break;
+ case 't':
+ rb_str_buf_cat2(result, "\t");
+ p++;
+ break;
+ case 'u':
+ if (p > pe - 4) {
+ return Qnil;
+ } else {
+ p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+ }
+ break;
+ default:
+ rb_str_buf_cat(result, p, 1);
+ p++;
+ break;
+ }
+ } else {
+ char *q = p;
+ while (*q != '\\' && q < pe) q++;
+ rb_str_buf_cat(result, p, q - p);
+ p = q;
+ }
+ }
+ return result;
+}
+
+%%{
+ machine JSON_string;
+ include JSON_common;
+
+ write data;
+
+ action parse_string {
+ *result = json_string_unescape(json->memo + 1, p);
+ if (NIL_P(*result)) fbreak; else fexec p + 1;
+ }
+
+ action exit { fbreak; }
+
+ main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
+}%%
+
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
+{
+ int cs = EVIL;
+
+ *result = rb_str_new("", 0);
+ %% write init;
+ json->memo = p;
+ %% write exec;
+
+ if (cs >= JSON_string_first_final) {
+ return p + 1;
+ } else {
+ return NULL;
+ }
+}
+
+
+%%{
+ machine JSON;
+
+ write data;
+
+ include JSON_common;
+
+ action parse_object {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_object(json, fpc, pe, &result);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ action parse_array {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_array(json, fpc, pe, &result);
+ if (np == NULL) fbreak; else fexec np;
+ }
+
+ main := ignore* (
+ begin_object >parse_object |
+ begin_array >parse_array
+ ) ignore*;
+}%%
+
+/*
+ * Document-class: JSON::Ext::Parser
+ *
+ * This is the JSON parser implemented as a C extension. It can be configured
+ * to be used by setting
+ *
+ * JSON.parser = JSON::Ext::Parser
+ *
+ * with the method parser= in JSON.
+ *
+ */
+
+/*
+ * call-seq: new(source, opts => {})
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * Creates a new JSON::Ext::Parser instance for the string _source_.
+ *
+ * It will be configured by the _opts_ hash. _opts_ can have the following
+ * keys:
+ *
+ * _opts_ can have the following keys:
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
+ * structures. Disable depth checking with :max_nesting => false.
+ */
+static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
+{
+ char *ptr;
+ long len;
+ VALUE source, opts;
+ GET_STRUCT;
+ rb_scan_args(argc, argv, "11", &source, &opts);
+ source = StringValue(source);
+ ptr = RSTRING_PTR(source);
+ len = RSTRING_LEN(source);
+ if (len < 2) {
+ rb_raise(eParserError, "A JSON text must at least contain two octets!");
+ }
+ json->max_nesting = 19;
+ if (!NIL_P(opts)) {
+ opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
+ if (NIL_P(opts)) {
+ rb_raise(rb_eArgError, "opts needs to be like a hash");
+ } else {
+ VALUE s_max_nesting = ID2SYM(i_max_nesting);
+ if (st_lookup(RHASH(opts)->tbl, s_max_nesting, 0)) {
+ VALUE max_nesting = rb_hash_aref(opts, s_max_nesting);
+ if (RTEST(max_nesting)) {
+ Check_Type(max_nesting, T_FIXNUM);
+ json->max_nesting = FIX2INT(max_nesting);
+ } else {
+ json->max_nesting = 0;
+ }
+ }
+ }
+ }
+ json->current_nesting = 0;
+ /*
+ Convert these?
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
+ rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
+ }
+ */
+ json->len = len;
+ json->source = ptr;
+ json->Vsource = source;
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ return self;
+}
+
+/*
+ * call-seq: parse()
+ *
+ * Parses the current JSON text _source_ and returns the complete data
+ * structure as a result.
+ */
+static VALUE cParser_parse(VALUE self)
+{
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
+ GET_STRUCT;
+
+ %% write init;
+ p = json->source;
+ pe = p + json->len;
+ %% write exec;
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ rb_raise(eParserError, "unexpected token at '%s'", p);
+ }
+}
+
+static JSON_Parser *JSON_allocate()
+{
+ JSON_Parser *json = ALLOC(JSON_Parser);
+ MEMZERO(json, JSON_Parser, 1);
+ return json;
+}
+
+static void JSON_mark(JSON_Parser *json)
+{
+ rb_gc_mark_maybe(json->Vsource);
+ rb_gc_mark_maybe(json->create_id);
+}
+
+static void JSON_free(JSON_Parser *json)
+{
+ free(json);
+}
+
+static VALUE cJSON_parser_s_allocate(VALUE klass)
+{
+ JSON_Parser *json = JSON_allocate();
+ return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
+}
+
+/*
+ * call-seq: source()
+ *
+ * Returns a copy of the current _source_ string, that was used to construct
+ * this Parser.
+ */
+static VALUE cParser_source(VALUE self)
+{
+ GET_STRUCT;
+ return rb_str_dup(json->Vsource);
+}
+
+void Init_parser()
+{
+ mJSON = rb_define_module("JSON");
+ mExt = rb_define_module_under(mJSON, "Ext");
+ cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
+ eParserError = rb_path2class("JSON::ParserError");
+ eNestingError = rb_path2class("JSON::NestingError");
+ rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
+ rb_define_method(cParser, "initialize", cParser_initialize, -1);
+ rb_define_method(cParser, "parse", cParser_parse, 0);
+ rb_define_method(cParser, "source", cParser_source, 0);
+
+ i_json_creatable_p = rb_intern("json_creatable?");
+ i_json_create = rb_intern("json_create");
+ i_create_id = rb_intern("create_id");
+ i_chr = rb_intern("chr");
+ i_max_nesting = rb_intern("max_nesting");
+}
diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c
new file mode 100644
index 0000000000..609a0e83e2
--- /dev/null
+++ b/ext/json/ext/parser/unicode.c
@@ -0,0 +1,156 @@
+/* vim: set cin et sw=4 ts=4: */
+
+#include "unicode.h"
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ *
+ * Disclaimer
+ *
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ *
+ * Limitations on Rights to Redistribute This Code
+ *
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+/*
+ * Index into the table below with the first byte of a UTF-8 sequence to
+ * get the number of trailing bytes that are supposed to follow it.
+ * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+ * left as-is for anyone who may want to do such conversion, which was
+ * allowed in earlier algorithms.
+ */
+static const char trailingBytesForUTF8[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+};
+
+/*
+ * Magic values subtracted from a buffer value during UTF8 conversion.
+ * This table contains as many values as there might be trailing bytes
+ * in a UTF-8 sequence.
+ */
+static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
+ 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+/*
+ * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+ * into the first byte, depending on how many bytes follow. There are
+ * as many entries in this table as there are UTF-8 sequence types.
+ * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+ * for *legal* UTF-8 will be 4 or fewer bytes total.
+ */
+static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+char *JSON_convert_UTF16_to_UTF8 (
+ VALUE buffer,
+ char *source,
+ char *sourceEnd,
+ ConversionFlags flags)
+{
+ UTF16 *tmp, *tmpPtr, *tmpEnd;
+ char buf[5];
+ long n = 0, i;
+ char *p = source - 1;
+
+ while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') {
+ p += 6;
+ n++;
+ }
+ p = source + 1;
+ buf[4] = 0;
+ tmpPtr = tmp = ALLOC_N(UTF16, n);
+ tmpEnd = tmp + n;
+ for (i = 0; i < n; i++) {
+ buf[0] = *p++;
+ buf[1] = *p++;
+ buf[2] = *p++;
+ buf[3] = *p++;
+ tmpPtr[i] = strtol(buf, NULL, 16);
+ p += 2;
+ }
+
+ while (tmpPtr < tmpEnd) {
+ UTF32 ch;
+ unsigned short bytesToWrite = 0;
+ const UTF32 byteMask = 0xBF;
+ const UTF32 byteMark = 0x80;
+ ch = *tmpPtr++;
+ /* If we have a surrogate pair, convert to UTF32 first. */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+ /* If the 16 bits following the high surrogate are in the source
+ * buffer... */
+ if (tmpPtr < tmpEnd) {
+ UTF32 ch2 = *tmpPtr;
+ /* If it's a low surrogate, convert to UTF32. */
+ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+ ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ + (ch2 - UNI_SUR_LOW_START) + halfBase;
+ ++tmpPtr;
+ } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+ free(tmp);
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "source sequence is illegal/malformed near %s", source);
+ }
+ } else { /* We don't have the 16 bits following the high surrogate. */
+ free(tmp);
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "partial character in source, but hit end near %s", source);
+ break;
+ }
+ } else if (flags == strictConversion) {
+ /* UTF-16 surrogate values are illegal in UTF-32 */
+ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+ free(tmp);
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "source sequence is illegal/malformed near %s", source);
+ }
+ }
+ /* Figure out how many bytes the result will require */
+ if (ch < (UTF32) 0x80) {
+ bytesToWrite = 1;
+ } else if (ch < (UTF32) 0x800) {
+ bytesToWrite = 2;
+ } else if (ch < (UTF32) 0x10000) {
+ bytesToWrite = 3;
+ } else if (ch < (UTF32) 0x110000) {
+ bytesToWrite = 4;
+ } else {
+ bytesToWrite = 3;
+ ch = UNI_REPLACEMENT_CHAR;
+ }
+
+ buf[0] = 0;
+ buf[1] = 0;
+ buf[2] = 0;
+ buf[3] = 0;
+ p = buf + bytesToWrite;
+ switch (bytesToWrite) { /* note: everything falls through. */
+ case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
+ case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
+ case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6;
+ case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]);
+ }
+ rb_str_buf_cat(buffer, p, bytesToWrite);
+ }
+ free(tmp);
+ source += 5 + (n - 1) * 6;
+ return source;
+}
diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h
new file mode 100755
index 0000000000..155da0ceee
--- /dev/null
+++ b/ext/json/ext/parser/unicode.h
@@ -0,0 +1,58 @@
+
+#ifndef _PARSER_UNICODE_H_
+#define _PARSER_UNICODE_H_
+
+#include "ruby.h"
+
+typedef unsigned long UTF32; /* at least 32 bits */
+typedef unsigned short UTF16; /* at least 16 bits */
+typedef unsigned char UTF8; /* typically 8 bits */
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+
+#define UNI_SUR_HIGH_START (UTF32)0xD800
+#define UNI_SUR_HIGH_END (UTF32)0xDBFF
+#define UNI_SUR_LOW_START (UTF32)0xDC00
+#define UNI_SUR_LOW_END (UTF32)0xDFFF
+
+static const int halfShift = 10; /* used for shifting by 10 bits */
+
+static const UTF32 halfBase = 0x0010000UL;
+static const UTF32 halfMask = 0x3FFUL;
+
+typedef enum {
+ conversionOK = 0, /* conversion successful */
+ sourceExhausted, /* partial character in source, but hit end */
+ targetExhausted, /* insuff. room in target for conversion */
+ sourceIllegal /* source sequence is illegal/malformed */
+} ConversionResult;
+
+typedef enum {
+ strictConversion = 0,
+ lenientConversion
+} ConversionFlags;
+
+char *JSON_convert_UTF16_to_UTF8 (
+ VALUE buffer,
+ char *source,
+ char *sourceEnd,
+ ConversionFlags flags);
+
+#ifndef RARRAY_PTR
+#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
+#endif
+#ifndef RARRAY_LEN
+#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
+#endif
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(string) RSTRING(string)->ptr
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(string) RSTRING(string)->len
+#endif
+
+#endif