diff options
Diffstat (limited to 'ext/json/parser/parser.rl')
| -rw-r--r-- | ext/json/parser/parser.rl | 1465 |
1 files changed, 0 insertions, 1465 deletions
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl deleted file mode 100644 index 9856a73808..0000000000 --- a/ext/json/parser/parser.rl +++ /dev/null @@ -1,1465 +0,0 @@ -#include "ruby.h" -#include "../fbuffer/fbuffer.h" - -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - -#ifndef HAVE_RB_HASH_BULK_INSERT -// For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) -{ - long index = 0; - while (index < count) { - VALUE name = pairs[index++]; - VALUE value = pairs[index++]; - rb_hash_aset(hash, name, value); - } - RB_GC_GUARD(hash); -} -#endif - -/* name cache */ - -#include <string.h> -#include <ctype.h> - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -static rb_encoding *enc_utf8; - -#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 - -static inline VALUE build_interned_string(const char *str, const long length) -{ -# ifdef HAVE_RB_ENC_INTERNED_STR - return rb_enc_interned_str(str, length, enc_utf8); -# else - VALUE rstring = rb_utf8_str_new(str, length); - return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); -# endif -} - -static inline VALUE build_symbol(const char *str, const long length) -{ - return rb_str_intern(build_interned_string(str, length)); -} - -static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) -{ - MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); - cache->length++; - cache->entries[index] = rstring; -} - -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); - } -} - -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rstring = build_interned_string(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); - } - return rstring; -} - -static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rsymbol = build_symbol(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); - } - return rsymbol; -} - -/* rvalue stack */ - -#define RVALUE_STACK_INITIAL_CAPA 128 - -enum rvalue_stack_type { - RVALUE_STACK_HEAP_ALLOCATED = 0, - RVALUE_STACK_STACK_ALLOCATED = 1, -}; - -typedef struct rvalue_stack_struct { - enum rvalue_stack_type type; - long capa; - long head; - VALUE *ptr; -} rvalue_stack; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); - -static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) -{ - long required = stack->capa * 2; - - if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { - stack = rvalue_stack_spill(stack, handle, stack_ref); - } else { - REALLOC_N(stack->ptr, VALUE, required); - stack->capa = required; - } - return stack; -} - -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) -{ - if (RB_UNLIKELY(stack->head >= stack->capa)) { - stack = rvalue_stack_grow(stack, handle, stack_ref); - } - stack->ptr[stack->head] = value; - stack->head++; -} - -static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) -{ - return stack->ptr + (stack->head - count); -} - -static inline void rvalue_stack_pop(rvalue_stack *stack, long count) -{ - stack->head -= count; -} - -static void rvalue_stack_mark(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); - } -} - -static void rvalue_stack_free(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); - } -} - -static size_t rvalue_stack_memsize(const void *ptr) -{ - const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; -} - -static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { - .dmark = rvalue_stack_mark, - .dfree = rvalue_stack_free, - .dsize = rvalue_stack_memsize, - }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) -{ - rvalue_stack *stack; - *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - *stack_ref = stack; - MEMCPY(stack, old_stack, rvalue_stack, 1); - - stack->capa = old_stack->capa << 1; - stack->ptr = ALLOC_N(VALUE, stack->capa); - stack->type = RVALUE_STACK_HEAP_ALLOCATED; - MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); - return stack; -} - -static void rvalue_stack_eagerly_release(VALUE handle) -{ - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); -} - -/* unicode */ - -static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static uint32_t unescape_unicode(const unsigned char *p) -{ - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - return result; -} - -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int in_array; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; - rvalue_cache name_cache; - rvalue_stack *stack; - VALUE stack_handle; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") - -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); - - -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - - -%%{ - machine JSON_common; - - cr = '\n'; - cr_neg = [^\n]; - ws = [ \t\r\n]; - c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; - cpp_comment = '//' cr_neg* cr; - comment = c_comment | cpp_comment; - ignore = ws | comment; - name_separator = ':'; - value_separator = ','; - Vnull = 'null'; - Vfalse = 'false'; - Vtrue = 'true'; - VNaN = 'NaN'; - VInfinity = 'Infinity'; - VMinusInfinity = '-Infinity'; - begin_value = [nft\"\-\[\{NI] | digit; - begin_object = '{'; - end_object = '}'; - begin_array = '['; - end_array = ']'; - begin_string = '"'; - begin_name = begin_string; - begin_number = digit | '-'; -}%% - -%%{ - machine JSON_object; - include JSON_common; - - write data; - - action parse_value { - char *np = JSON_parse_value(json, fpc, pe, result, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - fexec np; - } - } - - action allow_trailing_comma { json->allow_trailing_comma } - - action parse_name { - char *np; - json->parsing_name = true; - np = JSON_parse_string(json, fpc, pe, result); - json->parsing_name = false; - if (np == NULL) { fhold; fbreak; } else { - PUSH(*result); - fexec np; - } - } - - action exit { fhold; fbreak; } - - pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value; - next_pair = ignore* value_separator pair; - - main := ( - begin_object - (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* - end_object - ) @exit; -}%% - -#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - long stack_head = json->stack->head; - - %% write init; - %% write exec; - - if (cs >= JSON_object_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(json->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); - } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(json->stack, count); - - if (RB_UNLIKELY(json->create_additions)) { - VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } else { - klassname = rb_hash_aref(*result, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - } - return p + 1; - } else { - return NULL; - } -} - -%%{ - machine JSON_value; - include JSON_common; - - write data; - - action parse_null { - *result = Qnil; - } - action parse_false { - *result = Qfalse; - } - action parse_true { - *result = Qtrue; - } - action parse_nan { - if (json->allow_nan) { - *result = CNaN; - } else { - raise_parse_error("unexpected token at '%s'", p - 2); - } - } - action parse_infinity { - if (json->allow_nan) { - *result = CInfinity; - } else { - raise_parse_error("unexpected token at '%s'", p - 7); - } - } - action parse_string { - char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { - fhold; - fbreak; - } else { - fexec np; - } - } - - action parse_number { - char *np; - if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - fexec p + 10; - fhold; fbreak; - } else { - raise_parse_error("unexpected token at '%s'", p); - } - } - np = JSON_parse_number(json, fpc, pe, result); - if (np != NULL) { - fexec np; - } - fhold; fbreak; - } - - action parse_array { - char *np; - json->in_array++; - np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); - json->in_array--; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_object { - char *np; - np = JSON_parse_object(json, fpc, pe, result, current_nesting + 1); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - -main := ignore* ( - Vnull @parse_null | - Vfalse @parse_false | - Vtrue @parse_true | - VNaN @parse_nan | - VInfinity @parse_infinity | - begin_number @parse_number | - begin_string @parse_string | - begin_array @parse_array | - begin_object @parse_object - ) ignore* %*exit; -}%% - -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - %% write init; - %% write exec; - - if (json->freeze) { - OBJ_FREEZE(*result); - } - - if (cs >= JSON_value_first_final) { - PUSH(*result); - return p; - } else { - return NULL; - } -} - -%%{ - machine JSON_integer; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); -}%% - -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) -{ - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } - - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } - - if (negative) { - memo = -memo; - } - return LL2NUM(memo); -} - -static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) -{ - long len = p - json->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(json->memo, p); - } else { - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); - } - return p + 1; -} - -%%{ - machine JSON_float; - include JSON_common; - - write data; - - action exit { fhold; fbreak; } - action isFloat { is_float = true; } - - main := '-'? ( - (('0' | [1-9][0-9]*) - ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | - ([Ee] [+\-]?[0-9]+)) > isFloat)? - ) (^[0-9Ee.\-]? @exit )); -}%% - -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - bool is_float = false; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(json, p, result); - } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); - - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); - } - } - } - - long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); - } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); - } - - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_array; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - fexec np; - } - } - - action allow_trailing_comma { json->allow_trailing_comma } - - action exit { fhold; fbreak; } - - next_element = value_separator ignore* begin_value >parse_value; - - main := begin_array ignore* - ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? - end_array @exit; -}%% - -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - long stack_head = json->stack->head; - - %% write init; - %% write exec; - - if(cs >= JSON_array_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(json->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); - *result = array; - } - rvalue_stack_pop(json->stack, count); - - return p + 1; - } else { - raise_parse_error("unexpected token at '%s'", p); - return NULL; - } -} - -static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) -{ - if (symbolize) { - intern = true; - } - VALUE result; -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); - } else { - result = rb_utf8_str_new(start, (long)(end - start)); - } -# else - result = rb_utf8_str_new(start, (long)(end - start)); - if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } -# endif - - if (symbolize) { - result = rb_str_intern(result); - } - - return result; -} - -static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - return build_string(string, stringEnd, intern, symbolize); -} - -static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; - int unescape_len; - char buf[4]; - - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); - } - - VALUE result = rb_str_buf_new(bufferSize); - rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); - } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } - } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; - } - break; - default: - p = pe; - continue; - } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; - } else { - pe++; - } - } - - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - rb_str_set_len(result, buffer - bufferStart); - - if (symbolize) { - result = rb_str_intern(result); - } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } - - return result; -} - -%%{ - machine JSON_string; - include JSON_common; - - write data; - - action parse_complex_string { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } - - action parse_simple_string { - *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } - - double_quote = '"'; - escape = '\\'; - control = 0..0x1f; - simple = any - escape - double_quote - control; - - main := double_quote ( - (simple*)( - (double_quote) @parse_simple_string | - ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string - ) - ); -}%% - -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} - -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - %% write init; - json->memo = p; - %% write exec; - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; - } -} - -/* - * Document-class: JSON::Ext::Parser - * - * This is the JSON parser implemented as a C extension. It can be configured - * to be used by setting - * - * JSON.parser = JSON::Ext::Parser - * - * with the method parser= in JSON. - * - */ - -static VALUE convert_encoding(VALUE source) -{ - int encindex = RB_ENCODING_GET(source); - - if (RB_LIKELY(encindex == utf8_encindex)) { - return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; - } - } - - return ST_CONTINUE; -} - -static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) -{ - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - - json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - json->max_nesting = 100; - - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - } - - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; -} - -/* - * call-seq: new(source, opts => {}) - * - * Creates a new JSON::Ext::Parser instance for the string _source_. - * - * It will be configured by the _opts_ hash. _opts_ can have the following - * keys: - * - * _opts_ can have the following keys: - * * *max_nesting*: The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, it - * defaults to 100. - * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - * defiance of RFC 4627 to be parsed by the Parser. This option defaults to - * false. - * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is - * also the default. It's not possible to use this option in - * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. - * * *decimal_class*: Specifies which class to use instead of the default - * (Float) when parsing decimal numbers. This class must accept a single - * string argument in its constructor. - */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) -{ - GET_PARSER_INIT; - - rb_check_arity(argc, 1, 2); - - parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); - return self; -} - -%%{ - machine JSON; - - write data; - - include JSON_common; - - action parse_value { - char *np = JSON_parse_value(json, fpc, pe, &result, 0); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_value >parse_value - ) ignore*; -}%% - -/* - * call-seq: parse() - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - * It raises JSON::ParserError if fail to parse. - */ -static VALUE cParser_parse(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, source, opts); - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static void JSON_mark(void *ptr) -{ - JSON_Parser *json = ptr; - rb_gc_mark(json->Vsource); - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); - rb_gc_mark(json->stack_handle); - - long index; - for (index = 0; index < json->name_cache.length; index++) { - rb_gc_mark(json->name_cache.entries[index]); - } -} - -static void JSON_free(void *ptr) -{ - JSON_Parser *json = ptr; - fbuffer_free(&json->fbuffer); - ruby_xfree(json); -} - -static size_t JSON_memsize(const void *ptr) -{ - const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); -} - -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static VALUE cJSON_parser_s_allocate(VALUE klass) -{ - JSON_Parser *json; - VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); - return obj; -} - -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) -{ - GET_PARSER; - return rb_str_dup(json->Vsource); -} - -void Init_parser(void) -{ -#ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); -#endif - -#undef rb_intern - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); - - rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); - - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); - - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); - - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); - - rb_global_variable(&Encoding_UTF_8); - Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); - - sym_max_nesting = ID2SYM(rb_intern("max_nesting")); - sym_allow_nan = ID2SYM(rb_intern("allow_nan")); - sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); - sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); - sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); - sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - - i_create_id = rb_intern("create_id"); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_chr = rb_intern("chr"); - i_match = rb_intern("match"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); - i_new = rb_intern("new"); - i_try_convert = rb_intern("try_convert"); - i_uminus = rb_intern("-@"); - i_encode = rb_intern("encode"); - - binary_encindex = rb_ascii8bit_encindex(); - utf8_encindex = rb_utf8_encindex(); - enc_utf8 = rb_utf8_encoding(); -} - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ |
