diff options
author | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-04-26 04:34:36 +0000 |
---|---|---|
committer | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-04-26 04:34:36 +0000 |
commit | 27be3056dc0e9c313f25b430ca90b240a5e44160 (patch) | |
tree | b339421d40fd87a888691ded57a8cc00fbc2d2f2 /ext/json/ext/parser | |
parent | b0018f68590c8071bb0a242f75cc4fa048396e78 (diff) |
* ext/jason: revert r27493. came again after canceling gcc-ism.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27500 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/json/ext/parser')
-rw-r--r-- | ext/json/ext/parser/extconf.rb | 9 | ||||
-rw-r--r-- | ext/json/ext/parser/parser.c | 1829 | ||||
-rw-r--r-- | ext/json/ext/parser/parser.rl | 686 | ||||
-rw-r--r-- | ext/json/ext/parser/unicode.c | 154 | ||||
-rw-r--r-- | ext/json/ext/parser/unicode.h | 58 |
5 files changed, 2736 insertions, 0 deletions
diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb new file mode 100644 index 0000000000..e790f6caae --- /dev/null +++ b/ext/json/ext/parser/extconf.rb @@ -0,0 +1,9 @@ +require 'mkmf' +require 'rbconfig' + +if CONFIG['GCC'] == 'yes' + $CFLAGS += ' -Wall' + #$CFLAGS += ' -O0 -ggdb' +end + +create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c new file mode 100644 index 0000000000..a71c3b8e98 --- /dev/null +++ b/ext/json/ext/parser/parser.c @@ -0,0 +1,1829 @@ + +#line 1 "parser.rl" +#include "ruby.h" +#include "unicode.h" +#if HAVE_RE_H +#include "re.h" +#endif +#if HAVE_RUBY_ST_H +#include "ruby/st.h" +#endif +#if HAVE_ST_H +#include "st.h" +#endif + +#define EVIL 0x666 + +#ifndef RHASH_TBL +#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) +#endif + +#ifdef HAVE_RUBY_ENCODING_H +#include "ruby/encoding.h" +#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +#else +#define FORCE_UTF8(obj) +#endif + +static VALUE mJSON, mExt, cParser, eParserError, eNestingError; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, + i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; + +#define MinusInfinity "-Infinity" + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + int max_nesting; + int current_nesting; + int allow_nan; + VALUE object_class; + VALUE array_class; +} JSON_Parser; + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); + +#define GET_STRUCT \ + JSON_Parser *json; \ + Data_Get_Struct(self, JSON_Parser, json); + + +#line 84 "parser.rl" + + + +#line 66 "parser.c" +static const int JSON_object_start = 1; +static const int JSON_object_first_final = 27; +static const int JSON_object_error = 0; + +static const int JSON_object_en_main = 1; + + +#line 117 "parser.rl" + + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE last_name = Qnil; + VALUE object_class = json->object_class; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); + } + + *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + + +#line 90 "parser.c" + { + cs = JSON_object_start; + } + +#line 132 "parser.rl" + +#line 97 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + if ( (*p) == 123 ) + goto st2; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 13: goto st2; + case 32: goto st2; + case 34: goto tr2; + case 47: goto st23; + case 125: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st2; + goto st0; +tr2: +#line 103 "parser.rl" + { + char *np = JSON_parse_string(json, p, pe, &last_name); + if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} + } + goto st3; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: +#line 135 "parser.c" + switch( (*p) ) { + case 13: goto st3; + case 32: goto st3; + case 47: goto st4; + case 58: goto st8; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st3; + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st5; + case 47: goto st7; + } + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 42 ) + goto st6; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 10 ) + goto st3; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr11; + case 45: goto tr11; + case 47: goto st19; + case 73: goto tr11; + case 78: goto tr11; + case 91: goto tr11; + case 102: goto tr11; + case 110: goto tr11; + case 116: goto tr11; + case 123: goto tr11; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr11; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; +tr11: +#line 92 "parser.rl" + { + VALUE v = Qnil; + char *np = JSON_parse_value(json, p, pe, &v); + if (np == NULL) { + p--; {p++; cs = 9; goto _out;} + } else { + rb_hash_aset(*result, last_name, v); + {p = (( np))-1;} + } + } + goto st9; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: +#line 218 "parser.c" + switch( (*p) ) { + case 13: goto st9; + case 32: goto st9; + case 44: goto st10; + case 47: goto st15; + case 125: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st9; + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 34: goto tr2; + case 47: goto st11; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + switch( (*p) ) { + case 42: goto st12; + case 47: goto st14; + } + goto st0; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + if ( (*p) == 42 ) + goto st13; + goto st12; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + switch( (*p) ) { + case 42: goto st13; + case 47: goto st10; + } + goto st12; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + if ( (*p) == 10 ) + goto st10; + goto st14; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + switch( (*p) ) { + case 42: goto st16; + case 47: goto st18; + } + goto st0; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + if ( (*p) == 42 ) + goto st17; + goto st16; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + switch( (*p) ) { + case 42: goto st17; + case 47: goto st9; + } + goto st16; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + if ( (*p) == 10 ) + goto st9; + goto st18; +tr4: +#line 108 "parser.rl" + { p--; {p++; cs = 27; goto _out;} } + goto st27; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: +#line 314 "parser.c" + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + switch( (*p) ) { + case 42: goto st20; + case 47: goto st22; + } + goto st0; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + if ( (*p) == 42 ) + goto st21; + goto st20; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: + switch( (*p) ) { + case 42: goto st21; + case 47: goto st8; + } + goto st20; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: + if ( (*p) == 10 ) + goto st8; + goto st22; +st23: + if ( ++p == pe ) + goto _test_eof23; +case 23: + switch( (*p) ) { + case 42: goto st24; + case 47: goto st26; + } + goto st0; +st24: + if ( ++p == pe ) + goto _test_eof24; +case 24: + if ( (*p) == 42 ) + goto st25; + goto st24; +st25: + if ( ++p == pe ) + goto _test_eof25; +case 25: + switch( (*p) ) { + case 42: goto st25; + case 47: goto st2; + } + goto st24; +st26: + if ( ++p == pe ) + goto _test_eof26; +case 26: + if ( (*p) == 10 ) + goto st2; + goto st26; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; + _test_eof23: cs = 23; goto _test_eof; + _test_eof24: cs = 24; goto _test_eof; + _test_eof25: cs = 25; goto _test_eof; + _test_eof26: cs = 26; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 133 "parser.rl" + + if (cs >= JSON_object_first_final) { + if (RTEST(json->create_id)) { + VALUE klassname = rb_hash_aref(*result, json->create_id); + if (!NIL_P(klassname)) { + VALUE klass = rb_path2class(StringValueCStr(klassname)); + if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) { + *result = rb_funcall(klass, i_json_create, 1, *result); + } + } + } + return p + 1; + } else { + return NULL; + } +} + + +#line 431 "parser.c" +static const int JSON_value_start = 1; +static const int JSON_value_first_final = 21; +static const int JSON_value_error = 0; + +static const int JSON_value_en_main = 1; + + +#line 231 "parser.rl" + + +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + +#line 447 "parser.c" + { + cs = JSON_value_start; + } + +#line 238 "parser.rl" + +#line 454 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 34: goto tr0; + case 45: goto tr2; + case 73: goto st2; + case 78: goto st9; + case 91: goto tr5; + case 102: goto st11; + case 110: goto st15; + case 116: goto st18; + case 123: goto tr9; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + goto st0; +st0: +cs = 0; + goto _out; +tr0: +#line 179 "parser.rl" + { + char *np = JSON_parse_string(json, p, pe, result); + if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} + } + goto st21; +tr2: +#line 184 "parser.rl" + { + char *np; + if(pe > p + 9 && !strncmp(MinusInfinity, p, 9)) { + if (json->allow_nan) { + *result = CMinusInfinity; + {p = (( p + 10))-1;} + p--; {p++; cs = 21; goto _out;} + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + } + } + np = JSON_parse_float(json, p, pe, result); + if (np != NULL) {p = (( np))-1;} + np = JSON_parse_integer(json, p, pe, result); + if (np != NULL) {p = (( np))-1;} + p--; {p++; cs = 21; goto _out;} + } + goto st21; +tr5: +#line 202 "parser.rl" + { + char *np; + json->current_nesting++; + np = JSON_parse_array(json, p, pe, result); + json->current_nesting--; + if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} + } + goto st21; +tr9: +#line 210 "parser.rl" + { + char *np; + json->current_nesting++; + np = JSON_parse_object(json, p, pe, result); + json->current_nesting--; + if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} + } + goto st21; +tr16: +#line 172 "parser.rl" + { + if (json->allow_nan) { + *result = CInfinity; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); + } + } + goto st21; +tr18: +#line 165 "parser.rl" + { + if (json->allow_nan) { + *result = CNaN; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); + } + } + goto st21; +tr22: +#line 159 "parser.rl" + { + *result = Qfalse; + } + goto st21; +tr25: +#line 156 "parser.rl" + { + *result = Qnil; + } + goto st21; +tr28: +#line 162 "parser.rl" + { + *result = Qtrue; + } + goto st21; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: +#line 218 "parser.rl" + { p--; {p++; cs = 21; goto _out;} } +#line 569 "parser.c" + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + if ( (*p) == 110 ) + goto st3; + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 102 ) + goto st4; + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + if ( (*p) == 105 ) + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 110 ) + goto st6; + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + if ( (*p) == 105 ) + goto st7; + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 116 ) + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + if ( (*p) == 121 ) + goto tr16; + goto st0; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 97 ) + goto st10; + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + if ( (*p) == 78 ) + goto tr18; + goto st0; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + if ( (*p) == 97 ) + goto st12; + goto st0; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + if ( (*p) == 108 ) + goto st13; + goto st0; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + if ( (*p) == 115 ) + goto st14; + goto st0; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + if ( (*p) == 101 ) + goto tr22; + goto st0; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + if ( (*p) == 117 ) + goto st16; + goto st0; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + if ( (*p) == 108 ) + goto st17; + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + if ( (*p) == 108 ) + goto tr25; + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + if ( (*p) == 114 ) + goto st19; + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 117 ) + goto st20; + goto st0; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + if ( (*p) == 101 ) + goto tr28; + goto st0; + } + _test_eof21: cs = 21; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 239 "parser.rl" + + if (cs >= JSON_value_first_final) { + return p; + } else { + return NULL; + } +} + + +#line 740 "parser.c" +static const int JSON_integer_start = 1; +static const int JSON_integer_first_final = 5; +static const int JSON_integer_error = 0; + +static const int JSON_integer_en_main = 1; + + +#line 255 "parser.rl" + + +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + +#line 756 "parser.c" + { + cs = JSON_integer_start; + } + +#line 262 "parser.rl" + json->memo = p; + +#line 764 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 45: goto st2; + case 48: goto st3; + } + if ( 49 <= (*p) && (*p) <= 57 ) + goto st4; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + if ( (*p) == 48 ) + goto st3; + if ( 49 <= (*p) && (*p) <= 57 ) + goto st4; + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr4; +tr4: +#line 252 "parser.rl" + { p--; {p++; cs = 5; goto _out;} } + goto st5; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: +#line 805 "parser.c" + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st4; + goto tr4; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 264 "parser.rl" + + if (cs >= JSON_integer_first_final) { + long len = p - json->memo; + *result = rb_Integer(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + + +#line 836 "parser.c" +static const int JSON_float_start = 1; +static const int JSON_float_first_final = 10; +static const int JSON_float_error = 0; + +static const int JSON_float_en_main = 1; + + +#line 286 "parser.rl" + + +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + +#line 852 "parser.c" + { + cs = JSON_float_start; + } + +#line 293 "parser.rl" + json->memo = p; + +#line 860 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 45: goto st2; + case 48: goto st3; + } + if ( 49 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + if ( (*p) == 48 ) + goto st3; + if ( 49 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + switch( (*p) ) { + case 46: goto st4; + case 69: goto st6; + case 101: goto st6; + } + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + switch( (*p) ) { + case 69: goto st6; + case 101: goto st6; + } + if ( (*p) > 46 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + } else if ( (*p) >= 45 ) + goto st0; + goto tr7; +tr7: +#line 280 "parser.rl" + { p--; {p++; cs = 10; goto _out;} } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 925 "parser.c" + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 43: goto st7; + case 45: goto st7; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 69: goto st0; + case 101: goto st0; + } + if ( (*p) > 46 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + } else if ( (*p) >= 45 ) + goto st0; + goto tr7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + switch( (*p) ) { + case 46: goto st4; + case 69: goto st6; + case 101: goto st6; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 295 "parser.rl" + + if (cs >= JSON_float_first_final) { + long len = p - json->memo; + *result = rb_Float(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + + + +#line 999 "parser.c" +static const int JSON_array_start = 1; +static const int JSON_array_first_final = 17; +static const int JSON_array_error = 0; + +static const int JSON_array_en_main = 1; + + +#line 331 "parser.rl" + + +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE array_class = json->array_class; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); + } + *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + + +#line 1021 "parser.c" + { + cs = JSON_array_start; + } + +#line 344 "parser.rl" + +#line 1028 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + if ( (*p) == 91 ) + goto st2; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 13: goto st2; + case 32: goto st2; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st13; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st2; + goto st0; +tr2: +#line 312 "parser.rl" + { + VALUE v = Qnil; + char *np = JSON_parse_value(json, p, pe, &v); + if (np == NULL) { + p--; {p++; cs = 3; goto _out;} + } else { + rb_ary_push(*result, v); + {p = (( np))-1;} + } + } + goto st3; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: +#line 1083 "parser.c" + switch( (*p) ) { + case 13: goto st3; + case 32: goto st3; + case 44: goto st4; + case 47: goto st9; + case 93: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st3; + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 13: goto st4; + case 32: goto st4; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st5; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st4; + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + switch( (*p) ) { + case 42: goto st6; + case 47: goto st8; + } + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + if ( (*p) == 42 ) + goto st7; + goto st6; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st4; + } + goto st6; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + if ( (*p) == 10 ) + goto st4; + goto st8; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + switch( (*p) ) { + case 42: goto st10; + case 47: goto st12; + } + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + if ( (*p) == 42 ) + goto st11; + goto st10; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + switch( (*p) ) { + case 42: goto st11; + case 47: goto st3; + } + goto st10; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + if ( (*p) == 10 ) + goto st3; + goto st12; +tr4: +#line 323 "parser.rl" + { p--; {p++; cs = 17; goto _out;} } + goto st17; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: +#line 1190 "parser.c" + goto st0; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + switch( (*p) ) { + case 42: goto st14; + case 47: goto st16; + } + goto st0; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + if ( (*p) == 42 ) + goto st15; + goto st14; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + switch( (*p) ) { + case 42: goto st15; + case 47: goto st2; + } + goto st14; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + if ( (*p) == 10 ) + goto st2; + goto st16; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 345 "parser.rl" + + if(cs >= JSON_array_first_final) { + return p + 1; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + } +} + +static VALUE json_string_unescape(char *p, char *pe) +{ + VALUE result = rb_str_buf_new(pe - p + 1); + + while (p < pe) { + if (*p == '\\') { + p++; + if (p >= pe) return Qnil; /* raise an exception later, \ at end */ + switch (*p) { + case '"': + case '\\': + rb_str_buf_cat(result, p, 1); + p++; + break; + case 'b': + rb_str_buf_cat2(result, "\b"); + p++; + break; + case 'f': + rb_str_buf_cat2(result, "\f"); + p++; + break; + case 'n': + rb_str_buf_cat2(result, "\n"); + p++; + break; + case 'r': + rb_str_buf_cat2(result, "\r"); + p++; + break; + case 't': + rb_str_buf_cat2(result, "\t"); + p++; + break; + case 'u': + if (p > pe - 4) { + return Qnil; + } else { + p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion); + } + break; + default: + rb_str_buf_cat(result, p, 1); + p++; + break; + } + } else { + char *q = p; + while (*q != '\\' && q < pe) q++; + rb_str_buf_cat(result, p, q - p); + p = q; + } + } + return result; +} + + +#line 1312 "parser.c" +static const int JSON_string_start = 1; +static const int JSON_string_first_final = 8; +static const int JSON_string_error = 0; + +static const int JSON_string_en_main = 1; + + +#line 429 "parser.rl" + + +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + *result = rb_str_new("", 0); + +#line 1329 "parser.c" + { + cs = JSON_string_start; + } + +#line 437 "parser.rl" + json->memo = p; + +#line 1337 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + if ( (*p) == 34 ) + goto st2; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 34: goto tr2; + case 92: goto st3; + } + if ( 0 <= (*p) && (*p) <= 31 ) + goto st0; + goto st2; +tr2: +#line 415 "parser.rl" + { + *result = json_string_unescape(json->memo + 1, p); + if (NIL_P(*result)) { + p--; + {p++; cs = 8; goto _out;} + } else { + FORCE_UTF8(*result); + {p = (( p + 1))-1;} + } + } +#line 426 "parser.rl" + { p--; {p++; cs = 8; goto _out;} } + goto st8; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: +#line 1380 "parser.c" + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 117 ) + goto st4; + if ( 0 <= (*p) && (*p) <= 31 ) + goto st0; + goto st2; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st5; + } else + goto st5; + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st6; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st6; + } else + goto st6; + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st7; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st7; + } else + goto st7; + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st2; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st2; + } else + goto st2; + goto st0; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 439 "parser.rl" + + if (cs >= JSON_string_first_final) { + return p + 1; + } else { + return NULL; + } +} + + + +#line 1467 "parser.c" +static const int JSON_start = 1; +static const int JSON_first_final = 10; +static const int JSON_error = 0; + +static const int JSON_en_main = 1; + + +#line 473 "parser.rl" + + +/* + * Document-class: JSON::Ext::Parser + * + * This is the JSON parser implemented as a C extension. It can be configured + * to be used by setting + * + * JSON.parser = JSON::Ext::Parser + * + * with the method parser= in JSON. + * + */ + +/* + * call-seq: new(source, opts => {}) + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * It will be configured by the _opts_ hash. _opts_ can have the following + * keys: + * + * _opts_ can have the following keys: + * * *max_nesting*: The maximum depth of nesting allowed in the parsed data + * structures. Disable depth checking with :max_nesting => false|nil|0, it + * defaults to 19. + * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in + * defiance of RFC 4627 to be parsed by the Parser. This option defaults to + * false. + * * *create_additions*: If set to false, the Parser doesn't create + * additions even if a matchin class and create_id was found. This option + * defaults to true. + * * *object_class*: Defaults to Hash + * * *array_class*: Defaults to Array + */ +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +{ + char *ptr; + long len; + VALUE source, opts; + GET_STRUCT; + rb_scan_args(argc, argv, "11", &source, &opts); + source = StringValue(source); + ptr = RSTRING_PTR(source); + len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } + if (!NIL_P(opts)) { + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(opts)) { + rb_raise(rb_eArgError, "opts needs to be like a hash"); + } else { + VALUE tmp = ID2SYM(i_max_nesting); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } else { + json->max_nesting = 19; + } + tmp = ID2SYM(i_allow_nan); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE allow_nan = rb_hash_aref(opts, tmp); + json->allow_nan = RTEST(allow_nan) ? 1 : 0; + } else { + json->allow_nan = 0; + } + tmp = ID2SYM(i_create_additions); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE create_additions = rb_hash_aref(opts, tmp); + if (RTEST(create_additions)) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } else { + json->create_id = Qnil; + } + } else { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + tmp = ID2SYM(i_object_class); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + json->object_class = rb_hash_aref(opts, tmp); + } else { + json->object_class = Qnil; + } + tmp = ID2SYM(i_array_class); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + json->array_class = rb_hash_aref(opts, tmp); + } else { + json->array_class = Qnil; + } + } + } else { + json->max_nesting = 19; + json->allow_nan = 0; + json->create_id = rb_funcall(mJSON, i_create_id, 0); + json->object_class = Qnil; + json->array_class = Qnil; + } + json->current_nesting = 0; + /* + Convert these? + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } + */ + json->len = len; + json->source = ptr; + json->Vsource = source; + return self; +} + +/* + * call-seq: parse() + * + * Parses the current JSON text _source_ and returns the complete data + * structure as a result. + */ +static VALUE cParser_parse(VALUE self) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_STRUCT; + + +#line 1614 "parser.c" + { + cs = JSON_start; + } + +#line 611 "parser.rl" + p = json->source; + pe = p + json->len; + +#line 1623 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 47: goto st2; + case 91: goto tr3; + case 123: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st1; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st1; + goto st5; +tr3: +#line 462 "parser.rl" + { + char *np; + json->current_nesting = 1; + np = JSON_parse_array(json, p, pe, &result); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +tr4: +#line 455 "parser.rl" + { + char *np; + json->current_nesting = 1; + np = JSON_parse_object(json, p, pe, &result); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 1700 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st6; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st10; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st10; + goto st9; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 614 "parser.rl" + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + } +} + +inline static JSON_Parser *JSON_allocate() +{ + JSON_Parser *json = ALLOC(JSON_Parser); + MEMZERO(json, JSON_Parser, 1); + return json; +} + +static void JSON_mark(JSON_Parser *json) +{ + rb_gc_mark_maybe(json->Vsource); + rb_gc_mark_maybe(json->create_id); + rb_gc_mark_maybe(json->object_class); + rb_gc_mark_maybe(json->array_class); +} + +static void JSON_free(JSON_Parser *json) +{ + ruby_xfree(json); +} + +static VALUE cJSON_parser_s_allocate(VALUE klass) +{ + JSON_Parser *json = JSON_allocate(); + return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json); +} + +/* + * call-seq: source() + * + * Returns a copy of the current _source_ string, that was used to construct + * this Parser. + */ +static VALUE cParser_source(VALUE self) +{ + GET_STRUCT; + return rb_str_dup(json->Vsource); +} + +void Init_parser() +{ + rb_require("json/common"); + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eParserError = rb_path2class("JSON::ParserError"); + eNestingError = rb_path2class("JSON::NestingError"); + rb_define_alloc_func(cParser, cJSON_parser_s_allocate); + rb_define_method(cParser, "initialize", cParser_initialize, -1); + rb_define_method(cParser, "parse", cParser_parse, 0); + rb_define_method(cParser, "source", cParser_source, 0); + + CNaN = rb_const_get(mJSON, rb_intern("NaN")); + CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); + CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); + + i_json_creatable_p = rb_intern("json_creatable?"); + i_json_create = rb_intern("json_create"); + i_create_id = rb_intern("create_id"); + i_create_additions = rb_intern("create_additions"); + i_chr = rb_intern("chr"); + i_max_nesting = rb_intern("max_nesting"); + i_allow_nan = rb_intern("allow_nan"); + i_object_class = rb_intern("object_class"); + i_array_class = rb_intern("array_class"); +} diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl new file mode 100644 index 0000000000..3f180c4ec9 --- /dev/null +++ b/ext/json/ext/parser/parser.rl @@ -0,0 +1,686 @@ +#include "ruby.h" +#include "unicode.h" +#if HAVE_RE_H +#include "re.h" +#endif +#if HAVE_RUBY_ST_H +#include "ruby/st.h" +#endif +#if HAVE_ST_H +#include "st.h" +#endif + +#define EVIL 0x666 + +#ifndef RHASH_TBL +#define RHASH_TBL(hsh) (RHASH(hsh)->tbl) +#endif + +#ifdef HAVE_RUBY_ENCODING_H +#include "ruby/encoding.h" +#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) +#else +#define FORCE_UTF8(obj) +#endif + +static VALUE mJSON, mExt, cParser, eParserError, eNestingError; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, + i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class; + +#define MinusInfinity "-Infinity" + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + int max_nesting; + int current_nesting; + int allow_nan; + VALUE object_class; + VALUE array_class; +} JSON_Parser; + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); + +#define GET_STRUCT \ + JSON_Parser *json; \ + Data_Get_Struct(self, JSON_Parser, json); + +%%{ + machine JSON_common; + + cr = '\n'; + cr_neg = [^\n]; + ws = [ \t\r\n]; + c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; + cpp_comment = '//' cr_neg* cr; + comment = c_comment | cpp_comment; + ignore = ws | comment; + name_separator = ':'; + value_separator = ','; + Vnull = 'null'; + Vfalse = 'false'; + Vtrue = 'true'; + VNaN = 'NaN'; + VInfinity = 'Infinity'; + VMinusInfinity = '-Infinity'; + begin_value = [nft"\-[{NI] | digit; + begin_object = '{'; + end_object = '}'; + begin_array = '['; + end_array = ']'; + begin_string = '"'; + begin_name = begin_string; + begin_number = digit | '-'; +}%% + +%%{ + machine JSON_object; + include JSON_common; + + write data; + + action parse_value { + VALUE v = Qnil; + char *np = JSON_parse_value(json, fpc, pe, &v); + if (np == NULL) { + fhold; fbreak; + } else { + rb_hash_aset(*result, last_name, v); + fexec np; + } + } + + action parse_name { + char *np = JSON_parse_string(json, fpc, pe, &last_name); + if (np == NULL) { fhold; fbreak; } else fexec np; + } + + action exit { fhold; fbreak; } + + a_pair = ignore* begin_name >parse_name + ignore* name_separator ignore* + begin_value >parse_value; + + main := begin_object + (a_pair (ignore* value_separator a_pair)*)? + ignore* end_object @exit; +}%% + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE last_name = Qnil; + VALUE object_class = json->object_class; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); + } + + *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + + %% write init; + %% write exec; + + if (cs >= JSON_object_first_final) { + if (RTEST(json->create_id)) { + VALUE klassname = rb_hash_aref(*result, json->create_id); + if (!NIL_P(klassname)) { + VALUE klass = rb_path2class(StringValueCStr(klassname)); + if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) { + *result = rb_funcall(klass, i_json_create, 1, *result); + } + } + } + return p + 1; + } else { + return NULL; + } +} + +%%{ + machine JSON_value; + include JSON_common; + + write data; + + action parse_null { + *result = Qnil; + } + action parse_false { + *result = Qfalse; + } + action parse_true { + *result = Qtrue; + } + action parse_nan { + if (json->allow_nan) { + *result = CNaN; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); + } + } + action parse_infinity { + if (json->allow_nan) { + *result = CInfinity; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); + } + } + action parse_string { + char *np = JSON_parse_string(json, fpc, pe, result); + if (np == NULL) { fhold; fbreak; } else fexec np; + } + + action parse_number { + char *np; + if(pe > fpc + 9 && !strncmp(MinusInfinity, fpc, 9)) { + if (json->allow_nan) { + *result = CMinusInfinity; + fexec p + 10; + fhold; fbreak; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + } + } + np = JSON_parse_float(json, fpc, pe, result); + if (np != NULL) fexec np; + np = JSON_parse_integer(json, fpc, pe, result); + if (np != NULL) fexec np; + fhold; fbreak; + } + + action parse_array { + char *np; + json->current_nesting++; + np = JSON_parse_array(json, fpc, pe, result); + json->current_nesting--; + if (np == NULL) { fhold; fbreak; } else fexec np; + } + + action parse_object { + char *np; + json->current_nesting++; + np = JSON_parse_object(json, fpc, pe, result); + json->current_nesting--; + if (np == NULL) { fhold; fbreak; } else fexec np; + } + + action exit { fhold; fbreak; } + +main := ( + Vnull @parse_null | + Vfalse @parse_false | + Vtrue @parse_true | + VNaN @parse_nan | + VInfinity @parse_infinity | + begin_number >parse_number | + begin_string >parse_string | + begin_array >parse_array | + begin_object >parse_object + ) %*exit; +}%% + +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + %% write init; + %% write exec; + + if (cs >= JSON_value_first_final) { + return p; + } else { + return NULL; + } +} + +%%{ + machine JSON_integer; + + write data; + + action exit { fhold; fbreak; } + + main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit); +}%% + +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + %% write init; + json->memo = p; + %% write exec; + + if (cs >= JSON_integer_first_final) { + long len = p - json->memo; + *result = rb_Integer(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + +%%{ + machine JSON_float; + include JSON_common; + + write data; + + action exit { fhold; fbreak; } + + main := '-'? ( + (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) + | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) + ) (^[0-9Ee.\-] @exit ); +}%% + +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + %% write init; + json->memo = p; + %% write exec; + + if (cs >= JSON_float_first_final) { + long len = p - json->memo; + *result = rb_Float(rb_str_new(json->memo, len)); + return p + 1; + } else { + return NULL; + } +} + + +%%{ + machine JSON_array; + include JSON_common; + + write data; + + action parse_value { + VALUE v = Qnil; + char *np = JSON_parse_value(json, fpc, pe, &v); + if (np == NULL) { + fhold; fbreak; + } else { + rb_ary_push(*result, v); + fexec np; + } + } + + action exit { fhold; fbreak; } + + next_element = value_separator ignore* begin_value >parse_value; + + main := begin_array ignore* + ((begin_value >parse_value ignore*) + (ignore* next_element ignore*)*)? + end_array @exit; +}%% + +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE array_class = json->array_class; + + if (json->max_nesting && json->current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); + } + *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + + %% write init; + %% write exec; + + if(cs >= JSON_array_first_final) { + return p + 1; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + } +} + +static VALUE json_string_unescape(char *p, char *pe) +{ + VALUE result = rb_str_buf_new(pe - p + 1); + + while (p < pe) { + if (*p == '\\') { + p++; + if (p >= pe) return Qnil; /* raise an exception later, \ at end */ + switch (*p) { + case '"': + case '\\': + rb_str_buf_cat(result, p, 1); + p++; + break; + case 'b': + rb_str_buf_cat2(result, "\b"); + p++; + break; + case 'f': + rb_str_buf_cat2(result, "\f"); + p++; + break; + case 'n': + rb_str_buf_cat2(result, "\n"); + p++; + break; + case 'r': + rb_str_buf_cat2(result, "\r"); + p++; + break; + case 't': + rb_str_buf_cat2(result, "\t"); + p++; + break; + case 'u': + if (p > pe - 4) { + return Qnil; + } else { + p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion); + } + break; + default: + rb_str_buf_cat(result, p, 1); + p++; + break; + } + } else { + char *q = p; + while (*q != '\\' && q < pe) q++; + rb_str_buf_cat(result, p, q - p); + p = q; + } + } + return result; +} + +%%{ + machine JSON_string; + include JSON_common; + + write data; + + action parse_string { + *result = json_string_unescape(json->memo + 1, p); + if (NIL_P(*result)) { + fhold; + fbreak; + } else { + FORCE_UTF8(*result); + fexec p + 1; + } + } + + action exit { fhold; fbreak; } + + main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; +}%% + +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + + *result = rb_str_new("", 0); + %% write init; + json->memo = p; + %% write exec; + + if (cs >= JSON_string_first_final) { + return p + 1; + } else { + return NULL; + } +} + + +%%{ + machine JSON; + + write data; + + include JSON_common; + + action parse_object { + char *np; + json->current_nesting = 1; + np = JSON_parse_object(json, fpc, pe, &result); + if (np == NULL) { fhold; fbreak; } else fexec np; + } + + action parse_array { + char *np; + json->current_nesting = 1; + np = JSON_parse_array(json, fpc, pe, &result); + if (np == NULL) { fhold; fbreak; } else fexec np; + } + + main := ignore* ( + begin_object >parse_object | + begin_array >parse_array + ) ignore*; +}%% + +/* + * Document-class: JSON::Ext::Parser + * + * This is the JSON parser implemented as a C extension. It can be configured + * to be used by setting + * + * JSON.parser = JSON::Ext::Parser + * + * with the method parser= in JSON. + * + */ + +/* + * call-seq: new(source, opts => {}) + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * It will be configured by the _opts_ hash. _opts_ can have the following + * keys: + * + * _opts_ can have the following keys: + * * *max_nesting*: The maximum depth of nesting allowed in the parsed data + * structures. Disable depth checking with :max_nesting => false|nil|0, it + * defaults to 19. + * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in + * defiance of RFC 4627 to be parsed by the Parser. This option defaults to + * false. + * * *create_additions*: If set to false, the Parser doesn't create + * additions even if a matchin class and create_id was found. This option + * defaults to true. + * * *object_class*: Defaults to Hash + * * *array_class*: Defaults to Array + */ +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +{ + char *ptr; + long len; + VALUE source, opts; + GET_STRUCT; + rb_scan_args(argc, argv, "11", &source, &opts); + source = StringValue(source); + ptr = RSTRING_PTR(source); + len = RSTRING_LEN(source); + if (len < 2) { + rb_raise(eParserError, "A JSON text must at least contain two octets!"); + } + if (!NIL_P(opts)) { + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(opts)) { + rb_raise(rb_eArgError, "opts needs to be like a hash"); + } else { + VALUE tmp = ID2SYM(i_max_nesting); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } else { + json->max_nesting = 19; + } + tmp = ID2SYM(i_allow_nan); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE allow_nan = rb_hash_aref(opts, tmp); + json->allow_nan = RTEST(allow_nan) ? 1 : 0; + } else { + json->allow_nan = 0; + } + tmp = ID2SYM(i_create_additions); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + VALUE create_additions = rb_hash_aref(opts, tmp); + if (RTEST(create_additions)) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } else { + json->create_id = Qnil; + } + } else { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + tmp = ID2SYM(i_object_class); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + json->object_class = rb_hash_aref(opts, tmp); + } else { + json->object_class = Qnil; + } + tmp = ID2SYM(i_array_class); + if (st_lookup(RHASH_TBL(opts), tmp, 0)) { + json->array_class = rb_hash_aref(opts, tmp); + } else { + json->array_class = Qnil; + } + } + } else { + json->max_nesting = 19; + json->allow_nan = 0; + json->create_id = rb_funcall(mJSON, i_create_id, 0); + json->object_class = Qnil; + json->array_class = Qnil; + } + json->current_nesting = 0; + /* + Convert these? + if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { + rb_raise(eParserError, "Only UTF8 octet streams are supported atm!"); + } + */ + json->len = len; + json->source = ptr; + json->Vsource = source; + return self; +} + +/* + * call-seq: parse() + * + * Parses the current JSON text _source_ and returns the complete data + * structure as a result. + */ +static VALUE cParser_parse(VALUE self) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_STRUCT; + + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + } +} + +inline static JSON_Parser *JSON_allocate() +{ + JSON_Parser *json = ALLOC(JSON_Parser); + MEMZERO(json, JSON_Parser, 1); + return json; +} + +static void JSON_mark(JSON_Parser *json) +{ + rb_gc_mark_maybe(json->Vsource); + rb_gc_mark_maybe(json->create_id); + rb_gc_mark_maybe(json->object_class); + rb_gc_mark_maybe(json->array_class); +} + +static void JSON_free(JSON_Parser *json) +{ + ruby_xfree(json); +} + +static VALUE cJSON_parser_s_allocate(VALUE klass) +{ + JSON_Parser *json = JSON_allocate(); + return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json); +} + +/* + * call-seq: source() + * + * Returns a copy of the current _source_ string, that was used to construct + * this Parser. + */ +static VALUE cParser_source(VALUE self) +{ + GET_STRUCT; + return rb_str_dup(json->Vsource); +} + +void Init_parser() +{ + rb_require("json/common"); + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eParserError = rb_path2class("JSON::ParserError"); + eNestingError = rb_path2class("JSON::NestingError"); + rb_define_alloc_func(cParser, cJSON_parser_s_allocate); + rb_define_method(cParser, "initialize", cParser_initialize, -1); + rb_define_method(cParser, "parse", cParser_parse, 0); + rb_define_method(cParser, "source", cParser_source, 0); + + CNaN = rb_const_get(mJSON, rb_intern("NaN")); + CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); + CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); + + i_json_creatable_p = rb_intern("json_creatable?"); + i_json_create = rb_intern("json_create"); + i_create_id = rb_intern("create_id"); + i_create_additions = rb_intern("create_additions"); + i_chr = rb_intern("chr"); + i_max_nesting = rb_intern("max_nesting"); + i_allow_nan = rb_intern("allow_nan"); + i_object_class = rb_intern("object_class"); + i_array_class = rb_intern("array_class"); +} diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c new file mode 100644 index 0000000000..1af2878476 --- /dev/null +++ b/ext/json/ext/parser/unicode.c @@ -0,0 +1,154 @@ +#include "unicode.h" + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +char *JSON_convert_UTF16_to_UTF8 ( + VALUE buffer, + char *source, + char *sourceEnd, + ConversionFlags flags) +{ + UTF16 *tmp, *tmpPtr, *tmpEnd; + char buf[5]; + long n = 0, i; + char *p = source - 1; + + while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') { + p += 6; + n++; + } + p = source + 1; + buf[4] = 0; + tmpPtr = tmp = ALLOC_N(UTF16, n); + tmpEnd = tmp + n; + for (i = 0; i < n; i++) { + buf[0] = *p++; + buf[1] = *p++; + buf[2] = *p++; + buf[3] = *p++; + tmpPtr[i] = (UTF16)strtol(buf, NULL, 16); + p += 2; + } + + while (tmpPtr < tmpEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *tmpPtr++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source + * buffer... */ + if (tmpPtr < tmpEnd) { + UTF32 ch2 = *tmpPtr; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++tmpPtr; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + ruby_xfree(tmp); + rb_raise(rb_path2class("JSON::ParserError"), + "source sequence is illegal/malformed near %s", source); + } + } else { /* We don't have the 16 bits following the high surrogate. */ + ruby_xfree(tmp); + rb_raise(rb_path2class("JSON::ParserError"), + "partial character in source, but hit end near %s", source); + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + ruby_xfree(tmp); + rb_raise(rb_path2class("JSON::ParserError"), + "source sequence is illegal/malformed near %s", source); + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32) 0x80) { + bytesToWrite = 1; + } else if (ch < (UTF32) 0x800) { + bytesToWrite = 2; + } else if (ch < (UTF32) 0x10000) { + bytesToWrite = 3; + } else if (ch < (UTF32) 0x110000) { + bytesToWrite = 4; + } else { + bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = 0; + p = buf + bytesToWrite; + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--p = (UTF8) ((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--p = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + rb_str_buf_cat(buffer, p, bytesToWrite); + } + ruby_xfree(tmp); + source += 5 + (n - 1) * 6; + return source; +} diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h new file mode 100644 index 0000000000..155da0ceee --- /dev/null +++ b/ext/json/ext/parser/unicode.h @@ -0,0 +1,58 @@ + +#ifndef _PARSER_UNICODE_H_ +#define _PARSER_UNICODE_H_ + +#include "ruby.h" + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF32 halfMask = 0x3FFUL; + +typedef enum { + conversionOK = 0, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +char *JSON_convert_UTF16_to_UTF8 ( + VALUE buffer, + char *source, + char *sourceEnd, + ConversionFlags flags); + +#ifndef RARRAY_PTR +#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr +#endif +#ifndef RARRAY_LEN +#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len +#endif +#ifndef RSTRING_PTR +#define RSTRING_PTR(string) RSTRING(string)->ptr +#endif +#ifndef RSTRING_LEN +#define RSTRING_LEN(string) RSTRING(string)->len +#endif + +#endif |