diff options
Diffstat (limited to 'ext/json/parser/parser.c')
| -rw-r--r-- | ext/json/parser/parser.c | 3238 |
1 files changed, 3238 insertions, 0 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c new file mode 100644 index 0000000000..83ed9f2508 --- /dev/null +++ b/ext/json/parser/parser.c @@ -0,0 +1,3238 @@ +/* This file is automatically generated from parser.rl by using ragel */ +#line 1 "parser.rl" +#include "ruby.h" +#include "../fbuffer/fbuffer.h" + +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + +/* name cache */ + +#include <string.h> +#include <ctype.h> + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + long index; + for (index = 0; index < stack->head; index++) { + rb_gc_mark(stack->ptr[index]); + } +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} + +/* unicode */ + +static const signed char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; + +static uint32_t unescape_unicode(const unsigned char *p) +{ + const uint32_t replacement_char = 0xFFFD; + + signed char b; + uint32_t result = 0; + b = digit_values[p[0]]; + if (b < 0) return replacement_char; + result = (result << 4) | (unsigned char)b; + b = digit_values[p[1]]; + if (b < 0) return replacement_char; + result = (result << 4) | (unsigned char)b; + b = digit_values[p[2]]; + if (b < 0) return replacement_char; + result = (result << 4) | (unsigned char)b; + b = digit_values[p[3]]; + if (b < 0) return replacement_char; + result = (result << 4) | (unsigned char)b; + return result; +} + +static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) +{ + int len = 1; + if (ch <= 0x7F) { + buf[0] = (char) ch; + } else if (ch <= 0x07FF) { + buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) ((ch & 0x3F) | 0x80); + len++; + } else if (ch <= 0xFFFF) { + buf[0] = (char) ((ch >> 12) | 0xE0); + buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); + buf[2] = (char) ((ch & 0x3F) | 0x80); + len += 2; + } else if (ch <= 0x1fffff) { + buf[0] =(char) ((ch >> 18) | 0xF0); + buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); + buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); + buf[3] =(char) ((ch & 0x3F) | 0x80); + len += 3; + } else { + buf[0] = '?'; + } + return len; +} + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int in_array; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + +#ifndef HAVE_STRNLEN +static size_t strnlen(const char *s, size_t maxlen) +{ + char *p; + return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); +} +#endif + +#define PARSE_ERROR_FRAGMENT_LEN 32 +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error(const char *format, const char *start) +{ + char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + + size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); + const char *ptr = start; + + if (len == PARSE_ERROR_FRAGMENT_LEN) { + MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); + buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; + ptr = buffer; + } + + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); +} + + + +#line 473 "parser.rl" + + + +#line 455 "parser.c" +enum {JSON_object_start = 1}; +enum {JSON_object_first_final = 32}; +enum {JSON_object_error = 0}; + +enum {JSON_object_en_main = 1}; + + +#line 513 "parser.rl" + + +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) + +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) +{ + int cs = EVIL; + + if (json->max_nesting && current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); + } + + long stack_head = json->stack->head; + + +#line 479 "parser.c" + { + cs = JSON_object_start; + } + +#line 528 "parser.rl" + +#line 486 "parser.c" + { + short _widec; + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + if ( (*p) == 123 ) + goto st2; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 13: goto st2; + case 32: goto st2; + case 34: goto tr2; + case 47: goto st28; + case 125: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st2; + goto st0; +tr2: +#line 492 "parser.rl" + { + char *np; + json->parsing_name = true; + np = JSON_parse_string(json, p, pe, result); + json->parsing_name = false; + if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { + PUSH(*result); + {p = (( np))-1;} + } + } + goto st3; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: +#line 531 "parser.c" + switch( (*p) ) { + case 13: goto st3; + case 32: goto st3; + case 47: goto st4; + case 58: goto st8; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st3; + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st5; + case 47: goto st7; + } + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 42 ) + goto st6; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 10 ) + goto st3; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr11; + case 45: goto tr11; + case 47: goto st24; + case 73: goto tr11; + case 78: goto tr11; + case 91: goto tr11; + case 102: goto tr11; + case 110: goto tr11; + case 116: goto tr11; + case 123: goto tr11; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr11; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; +tr11: +#line 481 "parser.rl" + { + char *np = JSON_parse_value(json, p, pe, result, current_nesting); + if (np == NULL) { + p--; {p++; cs = 9; goto _out;} + } else { + {p = (( np))-1;} + } + } + goto st9; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: +#line 612 "parser.c" + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) < 44 ) { + if ( 32 <= (*p) && (*p) <= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 44 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 125: goto tr4; + case 269: goto st10; + case 288: goto st10; + case 300: goto st11; + case 303: goto st16; + case 525: goto st9; + case 544: goto st9; + case 556: goto st2; + case 559: goto st20; + } + if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st9; + } else if ( _widec >= 265 ) + goto st10; + goto st0; +tr4: +#line 503 "parser.rl" + { p--; {p++; cs = 32; goto _out;} } + goto st32; +st32: + if ( ++p == pe ) + goto _test_eof32; +case 32: +#line 680 "parser.c" + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 44: goto st11; + case 47: goto st16; + case 125: goto tr4; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + switch( (*p) ) { + case 13: goto st11; + case 32: goto st11; + case 34: goto tr2; + case 47: goto st12; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st11; + goto st0; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + switch( (*p) ) { + case 42: goto st13; + case 47: goto st15; + } + goto st0; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + if ( (*p) == 42 ) + goto st14; + goto st13; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + switch( (*p) ) { + case 42: goto st14; + case 47: goto st11; + } + goto st13; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + if ( (*p) == 10 ) + goto st11; + goto st15; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + switch( (*p) ) { + case 42: goto st17; + case 47: goto st19; + } + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + if ( (*p) == 42 ) + goto st18; + goto st17; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 42: goto st18; + case 47: goto st10; + } + goto st17; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 10 ) + goto st10; + goto st19; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st17; + case 303: goto st19; + case 554: goto st21; + case 559: goto st23; + } + goto st0; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 554: goto st22; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 303: goto st10; + case 554: goto st22; + case 559: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; +st23: + if ( ++p == pe ) + goto _test_eof23; +case 23: + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 490 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st10; + case 522: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st23; + } else if ( _widec >= 128 ) + goto st19; + goto st0; +st24: + if ( ++p == pe ) + goto _test_eof24; +case 24: + switch( (*p) ) { + case 42: goto st25; + case 47: goto st27; + } + goto st0; +st25: + if ( ++p == pe ) + goto _test_eof25; +case 25: + if ( (*p) == 42 ) + goto st26; + goto st25; +st26: + if ( ++p == pe ) + goto _test_eof26; +case 26: + switch( (*p) ) { + case 42: goto st26; + case 47: goto st8; + } + goto st25; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: + if ( (*p) == 10 ) + goto st8; + goto st27; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: + switch( (*p) ) { + case 42: goto st29; + case 47: goto st31; + } + goto st0; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: + if ( (*p) == 42 ) + goto st30; + goto st29; +st30: + if ( ++p == pe ) + goto _test_eof30; +case 30: + switch( (*p) ) { + case 42: goto st30; + case 47: goto st2; + } + goto st29; +st31: + if ( ++p == pe ) + goto _test_eof31; +case 31: + if ( (*p) == 10 ) + goto st2; + goto st31; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof32: cs = 32; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; + _test_eof23: cs = 23; goto _test_eof; + _test_eof24: cs = 24; goto _test_eof; + _test_eof25: cs = 25; goto _test_eof; + _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof30: cs = 30; goto _test_eof; + _test_eof31: cs = 31; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 529 "parser.rl" + + if (cs >= JSON_object_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { + VALUE klassname; + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); + } else { + klassname = rb_hash_aref(*result, json->create_id); + } + if (!NIL_P(klassname)) { + VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); + if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { + if (json->deprecated_create_additions) { + json_deprecated(deprecated_create_additions_warning); + } + *result = rb_funcall(klass, i_json_create, 1, *result); + } + } + } + return p + 1; + } else { + return NULL; + } +} + + +#line 1071 "parser.c" +enum {JSON_value_start = 1}; +enum {JSON_value_first_final = 29}; +enum {JSON_value_error = 0}; + +enum {JSON_value_en_main = 1}; + + +#line 662 "parser.rl" + + +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) +{ + int cs = EVIL; + + +#line 1087 "parser.c" + { + cs = JSON_value_start; + } + +#line 669 "parser.rl" + +#line 1094 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 34: goto tr2; + case 45: goto tr3; + case 47: goto st6; + case 73: goto st10; + case 78: goto st17; + case 91: goto tr7; + case 102: goto st19; + case 110: goto st23; + case 116: goto st26; + case 123: goto tr11; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr3; + } else if ( (*p) >= 9 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +tr2: +#line 607 "parser.rl" + { + char *np = JSON_parse_string(json, p, pe, result); + if (np == NULL) { + p--; + {p++; cs = 29; goto _out;} + } else { + {p = (( np))-1;} + } + } + goto st29; +tr3: +#line 617 "parser.rl" + { + char *np; + if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { + if (json->allow_nan) { + *result = CMinusInfinity; + {p = (( p + 10))-1;} + p--; {p++; cs = 29; goto _out;} + } else { + raise_parse_error("unexpected token at '%s'", p); + } + } + np = JSON_parse_number(json, p, pe, result); + if (np != NULL) { + {p = (( np))-1;} + } + p--; {p++; cs = 29; goto _out;} + } + goto st29; +tr7: +#line 635 "parser.rl" + { + char *np; + json->in_array++; + np = JSON_parse_array(json, p, pe, result, current_nesting + 1); + json->in_array--; + if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + } + goto st29; +tr11: +#line 643 "parser.rl" + { + char *np; + np = JSON_parse_object(json, p, pe, result, current_nesting + 1); + if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + } + goto st29; +tr25: +#line 600 "parser.rl" + { + if (json->allow_nan) { + *result = CInfinity; + } else { + raise_parse_error("unexpected token at '%s'", p - 7); + } + } + goto st29; +tr27: +#line 593 "parser.rl" + { + if (json->allow_nan) { + *result = CNaN; + } else { + raise_parse_error("unexpected token at '%s'", p - 2); + } + } + goto st29; +tr31: +#line 587 "parser.rl" + { + *result = Qfalse; + } + goto st29; +tr34: +#line 584 "parser.rl" + { + *result = Qnil; + } + goto st29; +tr37: +#line 590 "parser.rl" + { + *result = Qtrue; + } + goto st29; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: +#line 649 "parser.rl" + { p--; {p++; cs = 29; goto _out;} } +#line 1221 "parser.c" + switch( (*p) ) { + case 13: goto st29; + case 32: goto st29; + case 47: goto st2; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st29; + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st29; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st29; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st1; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st1; + goto st9; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + if ( (*p) == 110 ) + goto st11; + goto st0; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + if ( (*p) == 102 ) + goto st12; + goto st0; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + if ( (*p) == 105 ) + goto st13; + goto st0; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + if ( (*p) == 110 ) + goto st14; + goto st0; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + if ( (*p) == 105 ) + goto st15; + goto st0; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + if ( (*p) == 116 ) + goto st16; + goto st0; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + if ( (*p) == 121 ) + goto tr25; + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + if ( (*p) == 97 ) + goto st18; + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + if ( (*p) == 78 ) + goto tr27; + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 97 ) + goto st20; + goto st0; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + if ( (*p) == 108 ) + goto st21; + goto st0; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: + if ( (*p) == 115 ) + goto st22; + goto st0; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: + if ( (*p) == 101 ) + goto tr31; + goto st0; +st23: + if ( ++p == pe ) + goto _test_eof23; +case 23: + if ( (*p) == 117 ) + goto st24; + goto st0; +st24: + if ( ++p == pe ) + goto _test_eof24; +case 24: + if ( (*p) == 108 ) + goto st25; + goto st0; +st25: + if ( ++p == pe ) + goto _test_eof25; +case 25: + if ( (*p) == 108 ) + goto tr34; + goto st0; +st26: + if ( ++p == pe ) + goto _test_eof26; +case 26: + if ( (*p) == 114 ) + goto st27; + goto st0; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: + if ( (*p) == 117 ) + goto st28; + goto st0; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: + if ( (*p) == 101 ) + goto tr37; + goto st0; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; + _test_eof23: cs = 23; goto _test_eof; + _test_eof24: cs = 24; goto _test_eof; + _test_eof25: cs = 25; goto _test_eof; + _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 670 "parser.rl" + + if (json->freeze) { + OBJ_FREEZE(*result); + } + + if (cs >= JSON_value_first_final) { + PUSH(*result); + return p; + } else { + return NULL; + } +} + + +#line 1477 "parser.c" +enum {JSON_integer_start = 1}; +enum {JSON_integer_first_final = 3}; +enum {JSON_integer_error = 0}; + +enum {JSON_integer_en_main = 1}; + + +#line 691 "parser.rl" + + +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) +{ + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } + + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } + + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) +{ + long len = p - json->memo; + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } + return p + 1; +} + + +#line 1525 "parser.c" +enum {JSON_float_start = 1}; +enum {JSON_float_first_final = 6}; +enum {JSON_float_error = 0}; + +enum {JSON_float_en_main = 1}; + + +#line 743 "parser.rl" + + +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + bool is_float = false; + + +#line 1542 "parser.c" + { + cs = JSON_float_start; + } + +#line 751 "parser.rl" + json->memo = p; + +#line 1550 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 45: goto st2; + case 48: goto st6; + } + if ( 49 <= (*p) && (*p) <= 57 ) + goto st10; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + if ( (*p) == 48 ) + goto st6; + if ( 49 <= (*p) && (*p) <= 57 ) + goto st10; + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr7; +tr7: +#line 735 "parser.rl" + { p--; {p++; cs = 7; goto _out;} } + goto st7; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: +#line 1597 "parser.c" + goto st0; +tr8: +#line 736 "parser.rl" + { is_float = true; } + goto st3; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: +#line 1607 "parser.c" + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 69: goto st4; + case 101: goto st4; + } + if ( (*p) > 46 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + } else if ( (*p) >= 45 ) + goto st0; + goto tr7; +tr9: +#line 736 "parser.rl" + { is_float = true; } + goto st4; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: +#line 1633 "parser.c" + switch( (*p) ) { + case 43: goto st5; + case 45: goto st5; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st9; + goto st0; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + switch( (*p) ) { + case 69: goto st0; + case 101: goto st0; + } + if ( (*p) > 46 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st9; + } else if ( (*p) >= 45 ) + goto st0; + goto tr7; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + switch( (*p) ) { + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; + } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st10; + goto tr7; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 753 "parser.rl" + + if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } + VALUE mod = Qnil; + ID method_id = 0; + if (json->decimal_class) { + if (rb_respond_to(json->decimal_class, i_try_convert)) { + mod = json->decimal_class; + method_id = i_try_convert; + } else if (rb_respond_to(json->decimal_class, i_new)) { + mod = json->decimal_class; + method_id = i_new; + } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { + VALUE name = rb_class_name(json->decimal_class); + const char *name_cstr = RSTRING_PTR(name); + const char *last_colon = strrchr(name_cstr, ':'); + if (last_colon) { + const char *mod_path_end = last_colon - 1; + VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); + mod = rb_path_to_class(mod_path); + + const char *method_name_beg = last_colon + 1; + long before_len = method_name_beg - name_cstr; + long len = RSTRING_LEN(name) - before_len; + VALUE method_name = rb_str_substr(name, before_len, len); + method_id = SYM2ID(rb_str_intern(method_name)); + } else { + mod = rb_mKernel; + method_id = SYM2ID(rb_str_intern(name)); + } + } + } + + long len = p - json->memo; + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + + if (method_id) { + VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); + *result = rb_funcallv(mod, method_id, 1, &text); + } else { + *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); + } + + return p + 1; + } else { + return NULL; + } +} + + + +#line 1746 "parser.c" +enum {JSON_array_start = 1}; +enum {JSON_array_first_final = 22}; +enum {JSON_array_error = 0}; + +enum {JSON_array_en_main = 1}; + + +#line 833 "parser.rl" + + +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) +{ + int cs = EVIL; + + if (json->max_nesting && current_nesting > json->max_nesting) { + rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); + } + long stack_head = json->stack->head; + + +#line 1767 "parser.c" + { + cs = JSON_array_start; + } + +#line 845 "parser.rl" + +#line 1774 "parser.c" + { + short _widec; + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + if ( (*p) == 91 ) + goto st2; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 13: goto st2; + case 32: goto st2; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st18; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st2; + goto st0; +tr2: +#line 813 "parser.rl" + { + VALUE v = Qnil; + char *np = JSON_parse_value(json, p, pe, &v, current_nesting); + if (np == NULL) { + p--; {p++; cs = 3; goto _out;} + } else { + {p = (( np))-1;} + } + } + goto st3; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: +#line 1829 "parser.c" + _widec = (*p); + if ( 44 <= (*p) && (*p) <= 44 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 13: goto st3; + case 32: goto st3; + case 47: goto st4; + case 93: goto tr4; + case 300: goto st8; + case 556: goto st13; + } + if ( 9 <= _widec && _widec <= 10 ) + goto st3; + goto st0; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st5; + case 47: goto st7; + } + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 42 ) + goto st6; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 10 ) + goto st3; + goto st7; +tr4: +#line 825 "parser.rl" + { p--; {p++; cs = 22; goto _out;} } + goto st22; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: +#line 1888 "parser.c" + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st9; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + switch( (*p) ) { + case 42: goto st10; + case 47: goto st12; + } + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: + if ( (*p) == 42 ) + goto st11; + goto st10; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + switch( (*p) ) { + case 42: goto st11; + case 47: goto st8; + } + goto st10; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + if ( (*p) == 10 ) + goto st8; + goto st12; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) > 32 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 34: goto tr2; + case 45: goto tr2; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + case 269: goto st8; + case 288: goto st8; + case 303: goto st9; + case 525: goto st13; + case 544: goto st13; + case 559: goto st14; + } + if ( _widec < 265 ) { + if ( 48 <= _widec && _widec <= 57 ) + goto tr2; + } else if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st13; + } else + goto st8; + goto st0; +st14: + if ( ++p == pe ) + goto _test_eof14; +case 14: + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st10; + case 303: goto st12; + case 554: goto st15; + case 559: goto st17; + } + goto st0; +st15: + if ( ++p == pe ) + goto _test_eof15; +case 15: + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 554: goto st16; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; +st16: + if ( ++p == pe ) + goto _test_eof16; +case 16: + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 303: goto st8; + case 554: goto st16; + case 559: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 823 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st8; + case 522: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st17; + } else if ( _widec >= 128 ) + goto st12; + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 42: goto st19; + case 47: goto st21; + } + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 42 ) + goto st20; + goto st19; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + switch( (*p) ) { + case 42: goto st20; + case 47: goto st2; + } + goto st19; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: + if ( (*p) == 10 ) + goto st2; + goto st21; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + _test_eof14: cs = 14; goto _test_eof; + _test_eof15: cs = 15; goto _test_eof; + _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 846 "parser.rl" + + if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + + return p + 1; + } else { + raise_parse_error("unexpected token at '%s'", p); + return NULL; + } +} + +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); + } else { + result = rb_utf8_str_new(start, (long)(end - start)); + } +# else + result = rb_utf8_str_new(start, (long)(end - start)); + if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name && json->in_array) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + char *p = string, *pe = string, *unescape, *bufferStart, *buffer; + int unescape_len; + char buf[4]; + + if (is_name && json->in_array) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + pe = memchr(p, '\\', bufferSize); + if (RB_UNLIKELY(pe == NULL)) { + return build_string(string, stringEnd, intern, symbolize); + } + + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = bufferStart = RSTRING_PTR(result); + + while (pe < stringEnd) { + if (*pe == '\\') { + unescape = (char *) "?"; + unescape_len = 1; + if (pe > p) { + MEMCPY(buffer, p, char, pe - p); + buffer += pe - p; + } + switch (*++pe) { + case 'n': + unescape = (char *) "\n"; + break; + case 'r': + unescape = (char *) "\r"; + break; + case 't': + unescape = (char *) "\t"; + break; + case '"': + unescape = (char *) "\""; + break; + case '\\': + unescape = (char *) "\\"; + break; + case 'b': + unescape = (char *) "\b"; + break; + case 'f': + unescape = (char *) "\f"; + break; + case 'u': + if (pe > stringEnd - 4) { + raise_parse_error("incomplete unicode character escape sequence at '%s'", p); + } else { + uint32_t ch = unescape_unicode((unsigned char *) ++pe); + pe += 3; + /* To handle values above U+FFFF, we take a sequence of + * \uXXXX escapes in the U+D800..U+DBFF then + * U+DC00..U+DFFF ranges, take the low 10 bits from each + * to make a 20-bit number, then add 0x10000 to get the + * final codepoint. + * + * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling + * Surrogate Pairs in UTF-16", and 23.6 "Surrogates + * Area". + */ + if ((ch & 0xFC00) == 0xD800) { + pe++; + if (pe > stringEnd - 6) { + raise_parse_error("incomplete surrogate pair at '%s'", p); + } + if (pe[0] == '\\' && pe[1] == 'u') { + uint32_t sur = unescape_unicode((unsigned char *) pe + 2); + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) + | (sur & 0x3FF)); + pe += 5; + } else { + unescape = (char *) "?"; + break; + } + } + unescape_len = convert_UTF32_to_UTF8(buf, ch); + unescape = buf; + } + break; + default: + p = pe; + continue; + } + MEMCPY(buffer, unescape, char, unescape_len); + buffer += unescape_len; + p = ++pe; + } else { + pe++; + } + } + + if (pe > p) { + MEMCPY(buffer, p, char, pe - p); + buffer += pe - p; + } + rb_str_set_len(result, buffer - bufferStart); + + if (symbolize) { + result = rb_str_intern(result); + } else if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } + + return result; +} + + +#line 2411 "parser.c" +enum {JSON_string_start = 1}; +enum {JSON_string_first_final = 9}; +enum {JSON_string_error = 0}; + +enum {JSON_string_en_main = 1}; + + +#line 1069 "parser.rl" + + +static int +match_i(VALUE regexp, VALUE klass, VALUE memo) +{ + if (regexp == Qundef) return ST_STOP; + if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && + RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { + rb_ary_push(memo, klass); + return ST_STOP; + } + return ST_CONTINUE; +} + +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) +{ + int cs = EVIL; + VALUE match_string; + + +#line 2440 "parser.c" + { + cs = JSON_string_start; + } + +#line 1089 "parser.rl" + json->memo = p; + +#line 2448 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + if ( (*p) == 34 ) + goto st2; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 34: goto tr2; + case 92: goto st3; + } + if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st0; + goto st2; +tr2: +#line 1051 "parser.rl" + { + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } +#line 1044 "parser.rl" + { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } + goto st9; +tr6: +#line 1044 "parser.rl" + { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } + goto st9; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: +#line 2501 "parser.c" + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 117 ) + goto st5; + if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st0; + goto st4; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 34: goto tr6; + case 92: goto st3; + } + if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st0; + goto st4; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st6; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st6; + } else + goto st6; + goto st0; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st7; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st7; + } else + goto st7; + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st8; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st8; + } else + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st4; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st4; + } else + goto st4; + goto st0; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 1091 "parser.rl" + + if (json->create_additions && RTEST(match_string = json->match_string)) { + VALUE klass; + VALUE memo = rb_ary_new2(2); + rb_ary_push(memo, *result); + rb_hash_foreach(match_string, match_i, memo); + klass = rb_ary_entry(memo, 1); + if (RTEST(klass)) { + *result = rb_funcall(klass, i_json_create, 1, *result); + } + } + + if (cs >= JSON_string_first_final) { + return p + 1; + } else { + return NULL; + } +} + +/* + * Document-class: JSON::Ext::Parser + * + * This is the JSON parser implemented as a C extension. It can be configured + * to be used by setting + * + * JSON.parser = JSON::Ext::Parser + * + * with the method parser= in JSON. + * + */ + +static VALUE convert_encoding(VALUE source) +{ + int encindex = RB_ENCODING_GET(source); + + if (RB_LIKELY(encindex == utf8_encindex)) { + return source; + } + + if (encindex == binary_encindex) { + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + } + + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); +} + +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + } + + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; +} + +/* + * call-seq: new(source, opts => {}) + * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * + * It will be configured by the _opts_ hash. _opts_ can have the following + * keys: + * + * _opts_ can have the following keys: + * * *max_nesting*: The maximum depth of nesting allowed in the parsed data + * structures. Disable depth checking with :max_nesting => false|nil|0, it + * defaults to 100. + * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in + * defiance of RFC 4627 to be parsed by the Parser. This option defaults to + * false. + * * *symbolize_names*: If set to true, returns symbols for the names + * (keys) in a JSON object. Otherwise strings are returned, which is + * also the default. It's not possible to use this option in + * conjunction with the *create_additions* option. + * * *create_additions*: If set to false, the Parser doesn't create + * additions even if a matching class and create_id was found. This option + * defaults to false. + * * *object_class*: Defaults to Hash. If another type is provided, it will be used + * instead of Hash to represent JSON objects. The type must respond to + * +new+ without arguments, and return an object that respond to +[]=+. + * * *array_class*: Defaults to Array If another type is provided, it will be used + * instead of Hash to represent JSON arrays. The type must respond to + * +new+ without arguments, and return an object that respond to +<<+. + * * *decimal_class*: Specifies which class to use instead of the default + * (Float) when parsing decimal numbers. This class must accept a single + * string argument in its constructor. + */ +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +{ + GET_PARSER_INIT; + + rb_check_arity(argc, 1, 2); + + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); + return self; +} + + +#line 2742 "parser.c" +enum {JSON_start = 1}; +enum {JSON_first_final = 10}; +enum {JSON_error = 0}; + +enum {JSON_en_main = 1}; + + +#line 1257 "parser.rl" + + +/* + * call-seq: parse() + * + * Parses the current JSON text _source_ and returns the complete data + * structure as a result. + * It raises JSON::ParserError if fail to parse. + */ +static VALUE cParser_parse(VALUE self) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_PARSER; + + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + + +#line 2779 "parser.c" + { + cs = JSON_start; + } + +#line 1285 "parser.rl" + p = json->source; + pe = p + json->len; + +#line 2788 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st6; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +tr2: +#line 1249 "parser.rl" + { + char *np = JSON_parse_value(json, p, pe, &result, 0); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 2832 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st2; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st10; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st10; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st1; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st1; + goto st9; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 1288 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; + } +} + +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) +{ + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; + parser_init(json, source, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + + +#line 2957 "parser.c" + { + cs = JSON_start; + } + +#line 1323 "parser.rl" + p = json->source; + pe = p + json->len; + +#line 2966 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st6; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +tr2: +#line 1249 "parser.rl" + { + char *np = JSON_parse_value(json, p, pe, &result, 0); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 3010 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st2; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st10; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st10; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st1; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st1; + goto st9; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 1326 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; + } +} + +static void JSON_mark(void *ptr) +{ + JSON_Parser *json = ptr; + rb_gc_mark(json->Vsource); + rb_gc_mark(json->create_id); + rb_gc_mark(json->object_class); + rb_gc_mark(json->array_class); + rb_gc_mark(json->decimal_class); + rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + + long index; + for (index = 0; index < json->name_cache.length; index++) { + rb_gc_mark(json->name_cache.entries[index]); + } +} + +static void JSON_free(void *ptr) +{ + JSON_Parser *json = ptr; + fbuffer_free(&json->fbuffer); + ruby_xfree(json); +} + +static size_t JSON_memsize(const void *ptr) +{ + const JSON_Parser *json = ptr; + return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); +} + +static const rb_data_type_t JSON_Parser_type = { + "JSON/Parser", + {JSON_mark, JSON_free, JSON_memsize,}, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static VALUE cJSON_parser_s_allocate(VALUE klass) +{ + JSON_Parser *json; + VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); + fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); + return obj; +} + +/* + * call-seq: source() + * + * Returns a copy of the current _source_ string, that was used to construct + * this Parser. + */ +static VALUE cParser_source(VALUE self) +{ + GET_PARSER; + return rb_str_dup(json->Vsource); +} + +void Init_parser(void) +{ +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + +#undef rb_intern + rb_require("json/common"); + mJSON = rb_define_module("JSON"); + mExt = rb_define_module_under(mJSON, "Ext"); + cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eNestingError = rb_path2class("JSON::NestingError"); + rb_gc_register_mark_object(eNestingError); + rb_define_alloc_func(cParser, cJSON_parser_s_allocate); + rb_define_method(cParser, "initialize", cParser_initialize, -1); + rb_define_method(cParser, "parse", cParser_parse, 0); + rb_define_method(cParser, "source", cParser_source, 0); + + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + + CNaN = rb_const_get(mJSON, rb_intern("NaN")); + rb_gc_register_mark_object(CNaN); + + CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); + rb_gc_register_mark_object(CInfinity); + + CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); + rb_gc_register_mark_object(CMinusInfinity); + + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); + i_json_creatable_p = rb_intern("json_creatable?"); + i_json_create = rb_intern("json_create"); + i_chr = rb_intern("chr"); + i_match = rb_intern("match"); + i_deep_const_get = rb_intern("deep_const_get"); + i_aset = rb_intern("[]="); + i_aref = rb_intern("[]"); + i_leftshift = rb_intern("<<"); + i_new = rb_intern("new"); + i_try_convert = rb_intern("try_convert"); + i_uminus = rb_intern("-@"); + i_encode = rb_intern("encode"); + + binary_encindex = rb_ascii8bit_encindex(); + utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); +} + +/* + * Local variables: + * mode: c + * c-file-style: ruby + * indent-tabs-mode: nil + * End: + */ |
