diff options
Diffstat (limited to 'ext/json/parser')
| -rw-r--r-- | ext/json/parser/depend | 165 | ||||
| -rw-r--r-- | ext/json/parser/extconf.rb | 10 | ||||
| -rw-r--r-- | ext/json/parser/parser.c | 2373 | ||||
| -rw-r--r-- | ext/json/parser/parser.h | 91 | ||||
| -rw-r--r-- | ext/json/parser/parser.rl | 1166 | ||||
| -rw-r--r-- | ext/json/parser/prereq.mk | 5 |
6 files changed, 1005 insertions, 2805 deletions
diff --git a/ext/json/parser/depend b/ext/json/parser/depend index 4cdf69a749..2ffd904475 100644 --- a/ext/json/parser/depend +++ b/ext/json/parser/depend @@ -1,179 +1,22 @@ $(OBJS): $(ruby_headers) -parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h +parser.o: parser.c parser.h $(srcdir)/../fbuffer/fbuffer.h # AUTOGENERATED DEPENDENCIES START parser.o: $(RUBY_EXTCONF_H) parser.o: $(arch_hdrdir)/ruby/config.h -parser.o: $(hdrdir)/ruby.h -parser.o: $(hdrdir)/ruby/assert.h parser.o: $(hdrdir)/ruby/backward.h -parser.o: $(hdrdir)/ruby/backward/2/assume.h -parser.o: $(hdrdir)/ruby/backward/2/attributes.h -parser.o: $(hdrdir)/ruby/backward/2/bool.h -parser.o: $(hdrdir)/ruby/backward/2/inttypes.h -parser.o: $(hdrdir)/ruby/backward/2/limits.h -parser.o: $(hdrdir)/ruby/backward/2/long_long.h -parser.o: $(hdrdir)/ruby/backward/2/stdalign.h -parser.o: $(hdrdir)/ruby/backward/2/stdarg.h parser.o: $(hdrdir)/ruby/defines.h parser.o: $(hdrdir)/ruby/encoding.h parser.o: $(hdrdir)/ruby/intern.h -parser.o: $(hdrdir)/ruby/internal/abi.h -parser.o: $(hdrdir)/ruby/internal/anyargs.h -parser.o: $(hdrdir)/ruby/internal/arithmetic.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/char.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/double.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/int.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/long.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/short.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h -parser.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h -parser.o: $(hdrdir)/ruby/internal/assume.h -parser.o: $(hdrdir)/ruby/internal/attr/alloc_size.h -parser.o: $(hdrdir)/ruby/internal/attr/artificial.h -parser.o: $(hdrdir)/ruby/internal/attr/cold.h -parser.o: $(hdrdir)/ruby/internal/attr/const.h -parser.o: $(hdrdir)/ruby/internal/attr/constexpr.h -parser.o: $(hdrdir)/ruby/internal/attr/deprecated.h -parser.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h -parser.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h -parser.o: $(hdrdir)/ruby/internal/attr/error.h -parser.o: $(hdrdir)/ruby/internal/attr/flag_enum.h -parser.o: $(hdrdir)/ruby/internal/attr/forceinline.h -parser.o: $(hdrdir)/ruby/internal/attr/format.h -parser.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h -parser.o: $(hdrdir)/ruby/internal/attr/noalias.h -parser.o: $(hdrdir)/ruby/internal/attr/nodiscard.h -parser.o: $(hdrdir)/ruby/internal/attr/noexcept.h -parser.o: $(hdrdir)/ruby/internal/attr/noinline.h -parser.o: $(hdrdir)/ruby/internal/attr/nonnull.h -parser.o: $(hdrdir)/ruby/internal/attr/noreturn.h -parser.o: $(hdrdir)/ruby/internal/attr/packed_struct.h -parser.o: $(hdrdir)/ruby/internal/attr/pure.h -parser.o: $(hdrdir)/ruby/internal/attr/restrict.h -parser.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h -parser.o: $(hdrdir)/ruby/internal/attr/warning.h -parser.o: $(hdrdir)/ruby/internal/attr/weakref.h -parser.o: $(hdrdir)/ruby/internal/cast.h -parser.o: $(hdrdir)/ruby/internal/compiler_is.h -parser.o: $(hdrdir)/ruby/internal/compiler_is/apple.h -parser.o: $(hdrdir)/ruby/internal/compiler_is/clang.h -parser.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h -parser.o: $(hdrdir)/ruby/internal/compiler_is/intel.h -parser.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h -parser.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h -parser.o: $(hdrdir)/ruby/internal/compiler_since.h -parser.o: $(hdrdir)/ruby/internal/config.h -parser.o: $(hdrdir)/ruby/internal/constant_p.h -parser.o: $(hdrdir)/ruby/internal/core.h -parser.o: $(hdrdir)/ruby/internal/core/rarray.h -parser.o: $(hdrdir)/ruby/internal/core/rbasic.h -parser.o: $(hdrdir)/ruby/internal/core/rbignum.h -parser.o: $(hdrdir)/ruby/internal/core/rclass.h -parser.o: $(hdrdir)/ruby/internal/core/rdata.h -parser.o: $(hdrdir)/ruby/internal/core/rfile.h -parser.o: $(hdrdir)/ruby/internal/core/rhash.h -parser.o: $(hdrdir)/ruby/internal/core/robject.h -parser.o: $(hdrdir)/ruby/internal/core/rregexp.h -parser.o: $(hdrdir)/ruby/internal/core/rstring.h -parser.o: $(hdrdir)/ruby/internal/core/rstruct.h -parser.o: $(hdrdir)/ruby/internal/core/rtypeddata.h -parser.o: $(hdrdir)/ruby/internal/ctype.h -parser.o: $(hdrdir)/ruby/internal/dllexport.h -parser.o: $(hdrdir)/ruby/internal/dosish.h -parser.o: $(hdrdir)/ruby/internal/encoding/coderange.h -parser.o: $(hdrdir)/ruby/internal/encoding/ctype.h -parser.o: $(hdrdir)/ruby/internal/encoding/encoding.h -parser.o: $(hdrdir)/ruby/internal/encoding/pathname.h -parser.o: $(hdrdir)/ruby/internal/encoding/re.h -parser.o: $(hdrdir)/ruby/internal/encoding/sprintf.h -parser.o: $(hdrdir)/ruby/internal/encoding/string.h -parser.o: $(hdrdir)/ruby/internal/encoding/symbol.h -parser.o: $(hdrdir)/ruby/internal/encoding/transcode.h -parser.o: $(hdrdir)/ruby/internal/error.h -parser.o: $(hdrdir)/ruby/internal/eval.h -parser.o: $(hdrdir)/ruby/internal/event.h -parser.o: $(hdrdir)/ruby/internal/fl_type.h -parser.o: $(hdrdir)/ruby/internal/gc.h -parser.o: $(hdrdir)/ruby/internal/glob.h -parser.o: $(hdrdir)/ruby/internal/globals.h -parser.o: $(hdrdir)/ruby/internal/has/attribute.h -parser.o: $(hdrdir)/ruby/internal/has/builtin.h -parser.o: $(hdrdir)/ruby/internal/has/c_attribute.h -parser.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h -parser.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h -parser.o: $(hdrdir)/ruby/internal/has/extension.h -parser.o: $(hdrdir)/ruby/internal/has/feature.h -parser.o: $(hdrdir)/ruby/internal/has/warning.h -parser.o: $(hdrdir)/ruby/internal/intern/array.h -parser.o: $(hdrdir)/ruby/internal/intern/bignum.h -parser.o: $(hdrdir)/ruby/internal/intern/class.h -parser.o: $(hdrdir)/ruby/internal/intern/compar.h -parser.o: $(hdrdir)/ruby/internal/intern/complex.h -parser.o: $(hdrdir)/ruby/internal/intern/cont.h -parser.o: $(hdrdir)/ruby/internal/intern/dir.h -parser.o: $(hdrdir)/ruby/internal/intern/enum.h -parser.o: $(hdrdir)/ruby/internal/intern/enumerator.h -parser.o: $(hdrdir)/ruby/internal/intern/error.h -parser.o: $(hdrdir)/ruby/internal/intern/eval.h -parser.o: $(hdrdir)/ruby/internal/intern/file.h -parser.o: $(hdrdir)/ruby/internal/intern/hash.h -parser.o: $(hdrdir)/ruby/internal/intern/io.h -parser.o: $(hdrdir)/ruby/internal/intern/load.h -parser.o: $(hdrdir)/ruby/internal/intern/marshal.h -parser.o: $(hdrdir)/ruby/internal/intern/numeric.h -parser.o: $(hdrdir)/ruby/internal/intern/object.h -parser.o: $(hdrdir)/ruby/internal/intern/parse.h -parser.o: $(hdrdir)/ruby/internal/intern/proc.h -parser.o: $(hdrdir)/ruby/internal/intern/process.h -parser.o: $(hdrdir)/ruby/internal/intern/random.h -parser.o: $(hdrdir)/ruby/internal/intern/range.h -parser.o: $(hdrdir)/ruby/internal/intern/rational.h -parser.o: $(hdrdir)/ruby/internal/intern/re.h -parser.o: $(hdrdir)/ruby/internal/intern/ruby.h -parser.o: $(hdrdir)/ruby/internal/intern/select.h -parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h -parser.o: $(hdrdir)/ruby/internal/intern/signal.h -parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h -parser.o: $(hdrdir)/ruby/internal/intern/string.h -parser.o: $(hdrdir)/ruby/internal/intern/struct.h -parser.o: $(hdrdir)/ruby/internal/intern/thread.h -parser.o: $(hdrdir)/ruby/internal/intern/time.h -parser.o: $(hdrdir)/ruby/internal/intern/variable.h -parser.o: $(hdrdir)/ruby/internal/intern/vm.h -parser.o: $(hdrdir)/ruby/internal/interpreter.h -parser.o: $(hdrdir)/ruby/internal/iterator.h -parser.o: $(hdrdir)/ruby/internal/memory.h -parser.o: $(hdrdir)/ruby/internal/method.h -parser.o: $(hdrdir)/ruby/internal/module.h -parser.o: $(hdrdir)/ruby/internal/newobj.h -parser.o: $(hdrdir)/ruby/internal/scan_args.h -parser.o: $(hdrdir)/ruby/internal/special_consts.h -parser.o: $(hdrdir)/ruby/internal/static_assert.h -parser.o: $(hdrdir)/ruby/internal/stdalign.h -parser.o: $(hdrdir)/ruby/internal/stdbool.h -parser.o: $(hdrdir)/ruby/internal/stdckdint.h -parser.o: $(hdrdir)/ruby/internal/symbol.h -parser.o: $(hdrdir)/ruby/internal/value.h -parser.o: $(hdrdir)/ruby/internal/value_type.h -parser.o: $(hdrdir)/ruby/internal/variable.h -parser.o: $(hdrdir)/ruby/internal/warning_push.h -parser.o: $(hdrdir)/ruby/internal/xmalloc.h parser.o: $(hdrdir)/ruby/missing.h parser.o: $(hdrdir)/ruby/onigmo.h parser.o: $(hdrdir)/ruby/oniguruma.h parser.o: $(hdrdir)/ruby/ruby.h parser.o: $(hdrdir)/ruby/st.h parser.o: $(hdrdir)/ruby/subst.h -parser.o: $(srcdir)/../fbuffer/fbuffer.h +parser.o: $(top_srcdir)/ext/json/fbuffer/fbuffer.h +parser.o: $(top_srcdir)/include/ruby.h parser.o: parser.c +parser.o: parser.h parser.o: parser.rl # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index a8e21aed4b..f7360d46b2 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,12 +1,6 @@ -# frozen_string_literal: true +# frozen_string_literal: false require 'mkmf' -have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 -have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 -have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby -have_func("rb_category_warn", "ruby.h") # Missing on TruffleRuby -have_func("strnlen", "string.h") # Missing on Solaris 10 - -append_cflags("-std=c99") +have_func("rb_enc_raise", "ruby.h") create_makefile 'json/ext/parser' diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 83ed9f2508..ae90b2e8fd 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,315 +1,33 @@ -/* This file is automatically generated from parser.rl by using ragel */ + #line 1 "parser.rl" -#include "ruby.h" #include "../fbuffer/fbuffer.h" +#include "parser.h" -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - -#ifndef HAVE_RB_HASH_BULK_INSERT -// For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) -{ - long index = 0; - while (index < count) { - VALUE name = pairs[index++]; - VALUE value = pairs[index++]; - rb_hash_aset(hash, name, value); - } - RB_GC_GUARD(hash); -} -#endif - -/* name cache */ - -#include <string.h> -#include <ctype.h> - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -static rb_encoding *enc_utf8; - -#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 - -static inline VALUE build_interned_string(const char *str, const long length) +#if defined HAVE_RUBY_ENCODING_H +# define EXC_ENCODING rb_utf8_encoding(), +# ifndef HAVE_RB_ENC_RAISE +static void +enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) { -# ifdef HAVE_RB_ENC_INTERNED_STR - return rb_enc_interned_str(str, length, enc_utf8); -# else - VALUE rstring = rb_utf8_str_new(str, length); - return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); -# endif -} + va_list args; + VALUE mesg; -static inline VALUE build_symbol(const char *str, const long length) -{ - return rb_str_intern(build_interned_string(str, length)); -} - -static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) -{ - MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); - cache->length++; - cache->entries[index] = rstring; -} + va_start(args, fmt); + mesg = rb_enc_vsprintf(enc, fmt, args); + va_end(args); -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); - } -} - -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rstring = build_interned_string(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); - } - return rstring; -} - -static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rsymbol = build_symbol(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); - } - return rsymbol; -} - -/* rvalue stack */ - -#define RVALUE_STACK_INITIAL_CAPA 128 - -enum rvalue_stack_type { - RVALUE_STACK_HEAP_ALLOCATED = 0, - RVALUE_STACK_STACK_ALLOCATED = 1, -}; - -typedef struct rvalue_stack_struct { - enum rvalue_stack_type type; - long capa; - long head; - VALUE *ptr; -} rvalue_stack; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); - -static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) -{ - long required = stack->capa * 2; - - if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { - stack = rvalue_stack_spill(stack, handle, stack_ref); - } else { - REALLOC_N(stack->ptr, VALUE, required); - stack->capa = required; - } - return stack; -} - -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) -{ - if (RB_UNLIKELY(stack->head >= stack->capa)) { - stack = rvalue_stack_grow(stack, handle, stack_ref); - } - stack->ptr[stack->head] = value; - stack->head++; -} - -static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) -{ - return stack->ptr + (stack->head - count); -} - -static inline void rvalue_stack_pop(rvalue_stack *stack, long count) -{ - stack->head -= count; -} - -static void rvalue_stack_mark(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); - } -} - -static void rvalue_stack_free(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); - } -} - -static size_t rvalue_stack_memsize(const void *ptr) -{ - const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; -} - -static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { - .dmark = rvalue_stack_mark, - .dfree = rvalue_stack_free, - .dsize = rvalue_stack_memsize, - }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) -{ - rvalue_stack *stack; - *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - *stack_ref = stack; - MEMCPY(stack, old_stack, rvalue_stack, 1); - - stack->capa = old_stack->capa << 1; - stack->ptr = ALLOC_N(VALUE, stack->capa); - stack->type = RVALUE_STACK_HEAP_ALLOCATED; - MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); - return stack; -} - -static void rvalue_stack_eagerly_release(VALUE handle) -{ - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); + rb_exc_raise(rb_exc_new3(exc, mesg)); } +# define rb_enc_raise enc_raise +# endif +#else +# define EXC_ENCODING /* nothing */ +# define rb_enc_raise rb_raise +#endif /* unicode */ -static const signed char digit_values[256] = { +static const char digit_values[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, @@ -326,28 +44,26 @@ static const signed char digit_values[256] = { -1, -1, -1, -1, -1, -1, -1 }; -static uint32_t unescape_unicode(const unsigned char *p) +static UTF32 unescape_unicode(const unsigned char *p) { - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; + char b; + UTF32 result = 0; b = digit_values[p[0]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; b = digit_values[p[1]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; b = digit_values[p[2]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; b = digit_values[p[3]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; return result; } -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) +static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) { int len = 1; if (ch <= 0x7F) { @@ -373,118 +89,53 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int in_array; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; - rvalue_cache name_cache; - rvalue_stack *stack; - VALUE stack_handle; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") - -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); - - -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} +static VALUE mJSON, mExt, cParser, eParserError, eNestingError; +static VALUE CNaN, CInfinity, CMinusInfinity; +static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, + i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, + i_object_class, i_array_class, i_decimal_class, i_key_p, + i_deep_const_get, i_match, i_match_string, i_aset, i_aref, + i_leftshift, i_new; -#line 473 "parser.rl" +#line 125 "parser.rl" -#line 455 "parser.c" +#line 107 "parser.c" enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 32}; +enum {JSON_object_first_final = 27}; enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 513 "parser.rl" - +#line 166 "parser.rl" -#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; + VALUE last_name = Qnil; + VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - long stack_head = json->stack->head; + *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); -#line 479 "parser.c" +#line 131 "parser.c" { cs = JSON_object_start; } -#line 528 "parser.rl" +#line 181 "parser.rl" -#line 486 "parser.c" +#line 138 "parser.c" { - short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -504,30 +155,27 @@ case 2: case 13: goto st2; case 32: goto st2; case 34: goto tr2; - case 47: goto st28; + case 47: goto st23; case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st2; goto st0; tr2: -#line 492 "parser.rl" +#line 148 "parser.rl" { char *np; - json->parsing_name = true; - np = JSON_parse_string(json, p, pe, result); - json->parsing_name = false; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - PUSH(*result); - {p = (( np))-1;} - } + json->parsing_name = 1; + np = JSON_parse_string(json, p, pe, &last_name); + json->parsing_name = 0; + if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 531 "parser.c" +#line 179 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -578,7 +226,7 @@ case 8: case 32: goto st8; case 34: goto tr11; case 45: goto tr11; - case 47: goto st24; + case 47: goto st19; case 73: goto tr11; case 78: goto tr11; case 91: goto tr11; @@ -594,12 +242,18 @@ case 8: goto st8; goto st0; tr11: -#line 481 "parser.rl" +#line 133 "parser.rl" { - char *np = JSON_parse_value(json, p, pe, result, current_nesting); + VALUE v = Qnil; + char *np = JSON_parse_value(json, p, pe, &v, current_nesting); if (np == NULL) { p--; {p++; cs = 9; goto _out;} } else { + if (NIL_P(json->object_class)) { + rb_hash_aset(*result, last_name, v); + } else { + rb_funcall(*result, i_aset, 2, last_name, v); + } {p = (( np))-1;} } } @@ -608,75 +262,16 @@ st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 612 "parser.c" - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) < 44 ) { - if ( 32 <= (*p) && (*p) <= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 44 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { +#line 266 "parser.c" + switch( (*p) ) { + case 13: goto st9; + case 32: goto st9; + case 44: goto st10; + case 47: goto st15; case 125: goto tr4; - case 269: goto st10; - case 288: goto st10; - case 300: goto st11; - case 303: goto st16; - case 525: goto st9; - case 544: goto st9; - case 556: goto st2; - case 559: goto st20; - } - if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st9; - } else if ( _widec >= 265 ) - goto st10; - goto st0; -tr4: -#line 503 "parser.rl" - { p--; {p++; cs = 32; goto _out;} } - goto st32; -st32: - if ( ++p == pe ) - goto _test_eof32; -case 32: -#line 680 "parser.c" + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st9; goto st0; st10: if ( ++p == pe ) @@ -685,9 +280,8 @@ case 10: switch( (*p) ) { case 13: goto st10; case 32: goto st10; - case 44: goto st11; - case 47: goto st16; - case 125: goto tr4; + case 34: goto tr2; + case 47: goto st11; } if ( 9 <= (*p) && (*p) <= 10 ) goto st10; @@ -697,288 +291,139 @@ st11: goto _test_eof11; case 11: switch( (*p) ) { - case 13: goto st11; - case 32: goto st11; - case 34: goto tr2; - case 47: goto st12; + case 42: goto st12; + case 47: goto st14; } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st11; goto st0; st12: if ( ++p == pe ) goto _test_eof12; case 12: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st15; - } - goto st0; + if ( (*p) == 42 ) + goto st13; + goto st12; st13: if ( ++p == pe ) goto _test_eof13; case 13: - if ( (*p) == 42 ) - goto st14; - goto st13; + switch( (*p) ) { + case 42: goto st13; + case 47: goto st10; + } + goto st12; st14: if ( ++p == pe ) goto _test_eof14; case 14: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st11; - } - goto st13; + if ( (*p) == 10 ) + goto st10; + goto st14; st15: if ( ++p == pe ) goto _test_eof15; case 15: - if ( (*p) == 10 ) - goto st11; - goto st15; + switch( (*p) ) { + case 42: goto st16; + case 47: goto st18; + } + goto st0; st16: if ( ++p == pe ) goto _test_eof16; case 16: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st19; - } - goto st0; + if ( (*p) == 42 ) + goto st17; + goto st16; st17: if ( ++p == pe ) goto _test_eof17; case 17: - if ( (*p) == 42 ) - goto st18; - goto st17; + switch( (*p) ) { + case 42: goto st17; + case 47: goto st9; + } + goto st16; st18: if ( ++p == pe ) goto _test_eof18; case 18: - switch( (*p) ) { - case 42: goto st18; - case 47: goto st10; - } - goto st17; + if ( (*p) == 10 ) + goto st9; + goto st18; +tr4: +#line 156 "parser.rl" + { p--; {p++; cs = 27; goto _out;} } + goto st27; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: +#line 362 "parser.c" + goto st0; st19: if ( ++p == pe ) goto _test_eof19; case 19: - if ( (*p) == 10 ) - goto st10; - goto st19; + switch( (*p) ) { + case 42: goto st20; + case 47: goto st22; + } + goto st0; st20: if ( ++p == pe ) goto _test_eof20; case 20: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st17; - case 303: goto st19; - case 554: goto st21; - case 559: goto st23; - } - goto st0; + if ( (*p) == 42 ) + goto st21; + goto st20; st21: if ( ++p == pe ) goto _test_eof21; case 21: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 554: goto st22; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; + switch( (*p) ) { + case 42: goto st21; + case 47: goto st8; + } + goto st20; st22: if ( ++p == pe ) goto _test_eof22; case 22: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 303: goto st10; - case 554: goto st22; - case 559: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; + if ( (*p) == 10 ) + goto st8; + goto st22; st23: if ( ++p == pe ) goto _test_eof23; case 23: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 490 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st10; - case 522: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st23; - } else if ( _widec >= 128 ) - goto st19; + switch( (*p) ) { + case 42: goto st24; + case 47: goto st26; + } goto st0; st24: if ( ++p == pe ) goto _test_eof24; case 24: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st27; - } - goto st0; + if ( (*p) == 42 ) + goto st25; + goto st24; st25: if ( ++p == pe ) goto _test_eof25; case 25: - if ( (*p) == 42 ) - goto st26; - goto st25; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - switch( (*p) ) { - case 42: goto st26; - case 47: goto st8; - } - goto st25; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 10 ) - goto st8; - goto st27; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: switch( (*p) ) { - case 42: goto st29; - case 47: goto st31; - } - goto st0; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: - if ( (*p) == 42 ) - goto st30; - goto st29; -st30: - if ( ++p == pe ) - goto _test_eof30; -case 30: - switch( (*p) ) { - case 42: goto st30; + case 42: goto st25; case 47: goto st2; } - goto st29; -st31: + goto st24; +st26: if ( ++p == pe ) - goto _test_eof31; -case 31: + goto _test_eof26; +case 26: if ( (*p) == 10 ) goto st2; - goto st31; + goto st26; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -988,7 +433,6 @@ case 31: _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; - _test_eof32: cs = 32; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; @@ -998,6 +442,7 @@ case 31: _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; @@ -1006,56 +451,24 @@ case 31: _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof30: cs = 30; goto _test_eof; - _test_eof31: cs = 31; goto _test_eof; _test_eof: {} _out: {} } -#line 529 "parser.rl" +#line 182 "parser.rl" if (cs >= JSON_object_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(json->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); - } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(json->stack, count); - - if (RB_UNLIKELY(json->create_additions)) { + if (json->create_additions) { VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + if (NIL_P(json->object_class)) { + klassname = rb_hash_aref(*result, json->create_id); } else { - klassname = rb_hash_aref(*result, json->create_id); + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } *result = rb_funcall(klass, i_json_create, 1, *result); } } @@ -1067,7 +480,8 @@ case 31: } -#line 1071 "parser.c" + +#line 485 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -1075,7 +489,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 662 "parser.rl" +#line 282 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1083,14 +497,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1087 "parser.c" +#line 501 "parser.c" { cs = JSON_value_start; } -#line 669 "parser.rl" +#line 289 "parser.rl" -#line 1094 "parser.c" +#line 508 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1124,19 +538,14 @@ st0: cs = 0; goto _out; tr2: -#line 607 "parser.rl" +#line 234 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { - p--; - {p++; cs = 29; goto _out;} - } else { - {p = (( np))-1;} - } + if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr3: -#line 617 "parser.rl" +#line 239 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -1145,28 +554,26 @@ tr3: {p = (( p + 10))-1;} p--; {p++; cs = 29; goto _out;} } else { - raise_parse_error("unexpected token at '%s'", p); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); } } - np = JSON_parse_number(json, p, pe, result); - if (np != NULL) { - {p = (( np))-1;} - } + np = JSON_parse_float(json, p, pe, result); + if (np != NULL) {p = (( np))-1;} + np = JSON_parse_integer(json, p, pe, result); + if (np != NULL) {p = (( np))-1;} p--; {p++; cs = 29; goto _out;} } goto st29; tr7: -#line 635 "parser.rl" +#line 257 "parser.rl" { char *np; - json->in_array++; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); - json->in_array--; if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr11: -#line 643 "parser.rl" +#line 263 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -1174,39 +581,39 @@ tr11: } goto st29; tr25: -#line 600 "parser.rl" +#line 227 "parser.rl" { if (json->allow_nan) { *result = CInfinity; } else { - raise_parse_error("unexpected token at '%s'", p - 7); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); } } goto st29; tr27: -#line 593 "parser.rl" +#line 220 "parser.rl" { if (json->allow_nan) { *result = CNaN; } else { - raise_parse_error("unexpected token at '%s'", p - 2); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); } } goto st29; tr31: -#line 587 "parser.rl" +#line 214 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 584 "parser.rl" +#line 211 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 590 "parser.rl" +#line 217 "parser.rl" { *result = Qtrue; } @@ -1215,9 +622,9 @@ st29: if ( ++p == pe ) goto _test_eof29; case 29: -#line 649 "parser.rl" +#line 269 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 1221 "parser.c" +#line 628 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1458,14 +865,9 @@ case 28: _out: {} } -#line 670 "parser.rl" - - if (json->freeze) { - OBJ_FREEZE(*result); - } +#line 290 "parser.rl" if (cs >= JSON_value_first_final) { - PUSH(*result); return p; } else { return NULL; @@ -1473,7 +875,7 @@ case 28: } -#line 1477 "parser.c" +#line 879 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1481,72 +883,122 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 691 "parser.rl" +#line 306 "parser.rl" -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } + int cs = EVIL; - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } - if (negative) { - memo = -memo; - } - return LL2NUM(memo); -} +#line 895 "parser.c" + { + cs = JSON_integer_start; + } -static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) -{ +#line 313 "parser.rl" + json->memo = p; + +#line 903 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +case 1: + switch( (*p) ) { + case 45: goto st2; + case 48: goto st3; + } + if ( 49 <= (*p) && (*p) <= 57 ) + goto st5; + goto st0; +st0: +cs = 0; + goto _out; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + if ( (*p) == 48 ) + goto st3; + if ( 49 <= (*p) && (*p) <= 57 ) + goto st5; + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr4; +tr4: +#line 303 "parser.rl" + { p--; {p++; cs = 4; goto _out;} } + goto st4; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: +#line 944 "parser.c" + goto st0; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + goto tr4; + } + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 315 "parser.rl" + + if (cs >= JSON_integer_first_final) { long len = p - json->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(json->memo, p); - } else { - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); - } + fbuffer_clear(json->fbuffer); + fbuffer_append(json->fbuffer, json->memo, len); + fbuffer_append_char(json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); return p + 1; + } else { + return NULL; + } } -#line 1525 "parser.c" +#line 978 "parser.c" enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 6}; +enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 743 "parser.rl" +#line 340 "parser.rl" -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; - bool is_float = false; -#line 1542 "parser.c" +#line 994 "parser.c" { cs = JSON_float_start; } -#line 751 "parser.rl" +#line 347 "parser.rl" json->memo = p; -#line 1550 "parser.c" +#line 1002 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1555,10 +1007,10 @@ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *resu case 1: switch( (*p) ) { case 45: goto st2; - case 48: goto st6; + case 48: goto st3; } if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; + goto st7; goto st0; st0: cs = 0; @@ -1568,42 +1020,24 @@ st2: goto _test_eof2; case 2: if ( (*p) == 48 ) - goto st6; + goto st3; if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; + goto st7; goto st0; -st6: +st3: if ( ++p == pe ) - goto _test_eof6; -case 6: + goto _test_eof3; +case 3: switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; + case 46: goto st4; + case 69: goto st5; + case 101: goto st5; } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr7; -tr7: -#line 735 "parser.rl" - { p--; {p++; cs = 7; goto _out;} } - goto st7; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: -#line 1597 "parser.c" goto st0; -tr8: -#line 736 "parser.rl" - { is_float = true; } - goto st3; -st3: +st4: if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1607 "parser.c" + goto _test_eof4; +case 4: if ( 48 <= (*p) && (*p) <= 57 ) goto st8; goto st0; @@ -1612,128 +1046,98 @@ st8: goto _test_eof8; case 8: switch( (*p) ) { - case 69: goto st4; - case 101: goto st4; + case 69: goto st5; + case 101: goto st5; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st8; } else if ( (*p) >= 45 ) goto st0; - goto tr7; + goto tr9; tr9: -#line 736 "parser.rl" - { is_float = true; } - goto st4; -st4: +#line 334 "parser.rl" + { p--; {p++; cs = 9; goto _out;} } + goto st9; +st9: if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1633 "parser.c" - switch( (*p) ) { - case 43: goto st5; - case 45: goto st5; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; + goto _test_eof9; +case 9: +#line 1067 "parser.c" goto st0; st5: if ( ++p == pe ) goto _test_eof5; case 5: + switch( (*p) ) { + case 43: goto st6; + case 45: goto st6; + } if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; + goto st10; goto st0; -st9: +st6: if ( ++p == pe ) - goto _test_eof9; -case 9: + goto _test_eof6; +case 6: + if ( 48 <= (*p) && (*p) <= 57 ) + goto st10; + goto st0; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: switch( (*p) ) { case 69: goto st0; case 101: goto st0; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; + goto st10; } else if ( (*p) >= 45 ) goto st0; - goto tr7; -st10: + goto tr9; +st7: if ( ++p == pe ) - goto _test_eof10; -case 10: + goto _test_eof7; +case 7: switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; + case 46: goto st4; + case 69: goto st5; + case 101: goto st5; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto tr7; + goto st7; + goto st0; } _test_eof2: cs = 2; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; _test_eof: {} _out: {} } -#line 753 "parser.rl" +#line 349 "parser.rl" if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(json, p, result); - } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); - - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); - } - } - } - long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); + fbuffer_clear(json->fbuffer); + fbuffer_append(json->fbuffer, json->memo, len); + fbuffer_append_char(json->fbuffer, '\0'); + if (NIL_P(json->decimal_class)) { + *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); + VALUE text; + text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); + *result = rb_funcall(json->decimal_class, i_new, 1, text); } - return p + 1; } else { return NULL; @@ -1742,37 +1146,37 @@ case 10: -#line 1746 "parser.c" +#line 1150 "parser.c" enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 22}; +enum {JSON_array_first_final = 17}; enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 833 "parser.rl" +#line 398 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; + VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - long stack_head = json->stack->head; + *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); -#line 1767 "parser.c" +#line 1172 "parser.c" { cs = JSON_array_start; } -#line 845 "parser.rl" +#line 411 "parser.rl" -#line 1774 "parser.c" +#line 1179 "parser.c" { - short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -1793,7 +1197,7 @@ case 2: case 32: goto st2; case 34: goto tr2; case 45: goto tr2; - case 47: goto st18; + case 47: goto st13; case 73: goto tr2; case 78: goto tr2; case 91: goto tr2; @@ -1810,13 +1214,18 @@ case 2: goto st2; goto st0; tr2: -#line 813 "parser.rl" +#line 375 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { + if (NIL_P(json->array_class)) { + rb_ary_push(*result, v); + } else { + rb_funcall(*result, i_leftshift, 1, v); + } {p = (( np))-1;} } } @@ -1825,23 +1234,15 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1829 "parser.c" - _widec = (*p); - if ( 44 <= (*p) && (*p) <= 44 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { +#line 1238 "parser.c" + switch( (*p) ) { case 13: goto st3; case 32: goto st3; - case 47: goto st4; + case 44: goto st4; + case 47: goto st9; case 93: goto tr4; - case 300: goto st8; - case 556: goto st13; } - if ( 9 <= _widec && _widec <= 10 ) + if ( 9 <= (*p) && (*p) <= 10 ) goto st3; goto st0; st4: @@ -1849,67 +1250,57 @@ st4: goto _test_eof4; case 4: switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; + case 13: goto st4; + case 32: goto st4; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st5; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st4; goto st0; st5: if ( ++p == pe ) goto _test_eof5; case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; + switch( (*p) ) { + case 42: goto st6; + case 47: goto st8; + } + goto st0; st6: if ( ++p == pe ) goto _test_eof6; case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; + if ( (*p) == 42 ) + goto st7; + goto st6; st7: if ( ++p == pe ) goto _test_eof7; case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -tr4: -#line 825 "parser.rl" - { p--; {p++; cs = 22; goto _out;} } - goto st22; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: -#line 1888 "parser.c" - goto st0; + switch( (*p) ) { + case 42: goto st7; + case 47: goto st4; + } + goto st6; st8: if ( ++p == pe ) goto _test_eof8; case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st9; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; + if ( (*p) == 10 ) + goto st4; + goto st8; st9: if ( ++p == pe ) goto _test_eof9; @@ -1932,7 +1323,7 @@ st11: case 11: switch( (*p) ) { case 42: goto st11; - case 47: goto st8; + case 47: goto st3; } goto st10; st12: @@ -1940,252 +1331,50 @@ st12: goto _test_eof12; case 12: if ( (*p) == 10 ) - goto st8; + goto st3; goto st12; +tr4: +#line 390 "parser.rl" + { p--; {p++; cs = 17; goto _out;} } + goto st17; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: +#line 1345 "parser.c" + goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) > 32 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 34: goto tr2; - case 45: goto tr2; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - case 269: goto st8; - case 288: goto st8; - case 303: goto st9; - case 525: goto st13; - case 544: goto st13; - case 559: goto st14; - } - if ( _widec < 265 ) { - if ( 48 <= _widec && _widec <= 57 ) - goto tr2; - } else if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st13; - } else - goto st8; + switch( (*p) ) { + case 42: goto st14; + case 47: goto st16; + } goto st0; st14: if ( ++p == pe ) goto _test_eof14; case 14: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st10; - case 303: goto st12; - case 554: goto st15; - case 559: goto st17; - } - goto st0; + if ( (*p) == 42 ) + goto st15; + goto st14; st15: if ( ++p == pe ) goto _test_eof15; case 15: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 554: goto st16; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 303: goto st8; - case 554: goto st16; - case 559: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 823 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st8; - case 522: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st17; - } else if ( _widec >= 128 ) - goto st12; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: switch( (*p) ) { - case 42: goto st19; - case 47: goto st21; - } - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 42 ) - goto st20; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - switch( (*p) ) { - case 42: goto st20; + case 42: goto st15; case 47: goto st2; } - goto st19; -st21: + goto st14; +st16: if ( ++p == pe ) - goto _test_eof21; -case 21: + goto _test_eof16; +case 16: if ( (*p) == 10 ) goto st2; - goto st21; + goto st16; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -2193,135 +1382,42 @@ case 21: _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; _test_eof: {} _out: {} } -#line 846 "parser.rl" +#line 412 "parser.rl" if(cs >= JSON_array_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(json->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); - *result = array; - } - rvalue_stack_pop(json->stack, count); - return p + 1; } else { - raise_parse_error("unexpected token at '%s'", p); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); return NULL; } } -static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) -{ - if (symbolize) { - intern = true; - } - VALUE result; -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); - } else { - result = rb_utf8_str_new(start, (long)(end - start)); - } -# else - result = rb_utf8_str_new(start, (long)(end - start)); - if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } -# endif - - if (symbolize) { - result = rb_str_intern(result); - } - - return result; -} - -static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - return build_string(string, stringEnd, intern, symbolize); -} - -static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) { - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; + char *p = string, *pe = string, *unescape; int unescape_len; char buf[4]; - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); - } - - VALUE result = rb_str_buf_new(bufferSize); - rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - while (pe < stringEnd) { if (*pe == '\\') { unescape = (char *) "?"; unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } + if (pe > p) rb_str_buf_cat(result, p, pe - p); switch (*++pe) { case 'n': unescape = (char *) "\n"; @@ -2346,27 +1442,23 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE break; case 'u': if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); + rb_enc_raise( + EXC_ENCODING eParserError, + "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p + ); } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); + UTF32 ch = unescape_unicode((unsigned char *) ++pe); pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { + if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { pe++; if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); + rb_enc_raise( + EXC_ENCODING eParserError, + "%u: incomplete surrogate pair at '%s'", __LINE__, p + ); } if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); + UTF32 sur = unescape_unicode((unsigned char *) pe + 2); ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); pe += 5; @@ -2383,39 +1475,26 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE p = pe; continue; } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; + rb_str_buf_cat(result, unescape, unescape_len); p = ++pe; } else { pe++; } } - - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - rb_str_set_len(result, buffer - bufferStart); - - if (symbolize) { - result = rb_str_intern(result); - } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } - + rb_str_buf_cat(result, p, pe - p); return result; } -#line 2411 "parser.c" +#line 1490 "parser.c" enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 9}; +enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1069 "parser.rl" +#line 519 "parser.rl" static int @@ -2435,16 +1514,17 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu int cs = EVIL; VALUE match_string; + *result = rb_str_buf_new(0); -#line 2440 "parser.c" +#line 1520 "parser.c" { cs = JSON_string_start; } -#line 1089 "parser.rl" +#line 540 "parser.rl" json->memo = p; -#line 2448 "parser.c" +#line 1528 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2465,60 +1545,52 @@ case 2: case 34: goto tr2; case 92: goto st3; } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + if ( 0 <= (*p) && (*p) <= 31 ) goto st0; goto st2; tr2: -#line 1051 "parser.rl" - { - *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } -#line 1044 "parser.rl" +#line 505 "parser.rl" { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -tr6: -#line 1044 "parser.rl" - { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} + *result = json_string_unescape(*result, json->memo + 1, p); + if (NIL_P(*result)) { + p--; + {p++; cs = 8; goto _out;} + } else { + FORCE_UTF8(*result); + {p = (( p + 1))-1;} + } } - goto st9; -st9: +#line 516 "parser.rl" + { p--; {p++; cs = 8; goto _out;} } + goto st8; +st8: if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 2501 "parser.c" + goto _test_eof8; +case 8: +#line 1571 "parser.c" goto st0; st3: if ( ++p == pe ) goto _test_eof3; case 3: if ( (*p) == 117 ) - goto st5; - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st4; + if ( 0 <= (*p) && (*p) <= 31 ) goto st0; - goto st4; + goto st2; st4: if ( ++p == pe ) goto _test_eof4; case 4: - switch( (*p) ) { - case 34: goto tr6; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st5; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st5; + } else + goto st5; + goto st0; st5: if ( ++p == pe ) goto _test_eof5; @@ -2551,41 +1623,27 @@ st7: case 7: if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st8; - } else - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st4; + goto st2; } else if ( (*p) > 70 ) { if ( 97 <= (*p) && (*p) <= 102 ) - goto st4; + goto st2; } else - goto st4; + goto st2; goto st0; } _test_eof2: cs = 2; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; _test_eof: {} _out: {} } -#line 1091 "parser.rl" +#line 542 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2598,6 +1656,11 @@ case 8: } } + if (json->symbolize_names && json->parsing_name) { + *result = rb_str_intern(*result); + } else { + rb_str_resize(*result, RSTRING_LEN(*result)); + } if (cs >= JSON_string_first_final) { return p + 1; } else { @@ -2619,80 +1682,18 @@ case 8: static VALUE convert_encoding(VALUE source) { - int encindex = RB_ENCODING_GET(source); - - if (RB_LIKELY(encindex == utf8_encindex)) { - return source; +#ifdef HAVE_RUBY_ENCODING_H + rb_encoding *enc = rb_enc_get(source); + if (enc == rb_ascii8bit_encoding()) { + if (OBJ_FROZEN(source)) { + source = rb_str_dup(source); + } + FORCE_UTF8(source); + } else { + source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding()); } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; - } - } - - return ST_CONTINUE; -} - -static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) -{ - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - - json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - json->max_nesting = 100; - - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - } - - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; +#endif + return source; } /* @@ -2700,6 +1701,8 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) * * Creates a new JSON::Ext::Parser instance for the string _source_. * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * * It will be configured by the _opts_ hash. _opts_ can have the following * keys: * @@ -2717,28 +1720,117 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) * * *create_additions*: If set to false, the Parser doesn't create * additions even if a matching class and create_id was found. This option * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. - * * *decimal_class*: Specifies which class to use instead of the default - * (Float) when parsing decimal numbers. This class must accept a single - * string argument in its constructor. + * * *object_class*: Defaults to Hash + * * *array_class*: Defaults to Array */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { + VALUE source, opts; GET_PARSER_INIT; - rb_check_arity(argc, 1, 2); - - parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } +#ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH + rb_scan_args(argc, argv, "1:", &source, &opts); +#else + rb_scan_args(argc, argv, "11", &source, &opts); +#endif + if (!NIL_P(opts)) { +#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(opts)) { + rb_raise(rb_eArgError, "opts needs to be like a hash"); + } else { +#endif + VALUE tmp = ID2SYM(i_max_nesting); + if (option_given_p(opts, tmp)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } else { + json->max_nesting = 100; + } + tmp = ID2SYM(i_allow_nan); + if (option_given_p(opts, tmp)) { + json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->allow_nan = 0; + } + tmp = ID2SYM(i_symbolize_names); + if (option_given_p(opts, tmp)) { + json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->symbolize_names = 0; + } + tmp = ID2SYM(i_create_additions); + if (option_given_p(opts, tmp)) { + json->create_additions = RTEST(rb_hash_aref(opts, tmp)); + } else { + json->create_additions = 0; + } + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + tmp = ID2SYM(i_create_id); + if (option_given_p(opts, tmp)) { + json->create_id = rb_hash_aref(opts, tmp); + } else { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + tmp = ID2SYM(i_object_class); + if (option_given_p(opts, tmp)) { + json->object_class = rb_hash_aref(opts, tmp); + } else { + json->object_class = Qnil; + } + tmp = ID2SYM(i_array_class); + if (option_given_p(opts, tmp)) { + json->array_class = rb_hash_aref(opts, tmp); + } else { + json->array_class = Qnil; + } + tmp = ID2SYM(i_decimal_class); + if (option_given_p(opts, tmp)) { + json->decimal_class = rb_hash_aref(opts, tmp); + } else { + json->decimal_class = Qnil; + } + tmp = ID2SYM(i_match_string); + if (option_given_p(opts, tmp)) { + VALUE match_string = rb_hash_aref(opts, tmp); + json->match_string = RTEST(match_string) ? match_string : Qnil; + } else { + json->match_string = Qnil; + } +#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH + } +#endif + } else { + json->max_nesting = 100; + json->allow_nan = 0; + json->create_additions = 0; + json->create_id = rb_funcall(mJSON, i_create_id, 0); + json->object_class = Qnil; + json->array_class = Qnil; + json->decimal_class = Qnil; + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source);; + json->Vsource = source; return self; } -#line 2742 "parser.c" +#line 1834 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2746,7 +1838,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1257 "parser.rl" +#line 742 "parser.rl" /* @@ -2754,37 +1846,25 @@ enum {JSON_en_main = 1}; * * Parses the current JSON text _source_ and returns the complete data * structure as a result. - * It raises JSON::ParserError if fail to parse. */ static VALUE cParser_parse(VALUE self) { - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_PARSER; -#line 2779 "parser.c" +#line 1859 "parser.c" { cs = JSON_start; } -#line 1285 "parser.rl" - p = json->source; - pe = p + json->len; +#line 758 "parser.rl" + p = json->source; + pe = p + json->len; -#line 2788 "parser.c" +#line 1868 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2818,7 +1898,7 @@ st0: cs = 0; goto _out; tr2: -#line 1249 "parser.rl" +#line 734 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2828,7 +1908,7 @@ st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 2832 "parser.c" +#line 1912 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2917,240 +1997,56 @@ case 9: _out: {} } -#line 1288 "parser.rl" +#line 761 "parser.rl" - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, source, opts); - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - -#line 2957 "parser.c" - { - cs = JSON_start; - } - -#line 1323 "parser.rl" - p = json->source; - pe = p + json->len; - -#line 2966 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 1249 "parser.rl" - { - char *np = JSON_parse_value(json, p, pe, &result, 0); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 3010 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1326 "parser.rl" - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return Qnil; + } } static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; - rb_gc_mark(json->Vsource); - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); - rb_gc_mark(json->stack_handle); - - long index; - for (index = 0; index < json->name_cache.length; index++) { - rb_gc_mark(json->name_cache.entries[index]); - } + rb_gc_mark_maybe(json->Vsource); + rb_gc_mark_maybe(json->create_id); + rb_gc_mark_maybe(json->object_class); + rb_gc_mark_maybe(json->array_class); + rb_gc_mark_maybe(json->decimal_class); + rb_gc_mark_maybe(json->match_string); } static void JSON_free(void *ptr) { JSON_Parser *json = ptr; - fbuffer_free(&json->fbuffer); + fbuffer_free(json->fbuffer); ruby_xfree(json); } static size_t JSON_memsize(const void *ptr) { const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); + return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); } +#ifdef NEW_TYPEDDATA_WRAPPER static const rb_data_type_t JSON_Parser_type = { "JSON/Parser", {JSON_mark, JSON_free, JSON_memsize,}, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY 0, 0, RUBY_TYPED_FREE_IMMEDIATELY, +#endif }; +#endif static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); + json->fbuffer = fbuffer_alloc(0); return obj; } @@ -3168,65 +2064,40 @@ static VALUE cParser_source(VALUE self) void Init_parser(void) { -#ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); -#endif - -#undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); mExt = rb_define_module_under(mJSON, "Ext"); cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eParserError = rb_path2class("JSON::ParserError"); eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); rb_define_alloc_func(cParser, cJSON_parser_s_allocate); rb_define_method(cParser, "initialize", cParser_initialize, -1); rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); - rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); - - rb_global_variable(&Encoding_UTF_8); - Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); - - sym_max_nesting = ID2SYM(rb_intern("max_nesting")); - sym_allow_nan = ID2SYM(rb_intern("allow_nan")); - sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); - sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); - sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); - sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); + i_create_id = rb_intern("create_id"); + i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); + i_max_nesting = rb_intern("max_nesting"); + i_allow_nan = rb_intern("allow_nan"); + i_symbolize_names = rb_intern("symbolize_names"); + i_object_class = rb_intern("object_class"); + i_array_class = rb_intern("array_class"); + i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); + i_match_string = rb_intern("match_string"); + i_key_p = rb_intern("key?"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); - i_try_convert = rb_intern("try_convert"); - i_uminus = rb_intern("-@"); - i_encode = rb_intern("encode"); - - binary_encindex = rb_ascii8bit_encindex(); - utf8_encindex = rb_utf8_encindex(); - enc_utf8 = rb_utf8_encoding(); } /* diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h new file mode 100644 index 0000000000..e6cf779024 --- /dev/null +++ b/ext/json/parser/parser.h @@ -0,0 +1,91 @@ +#ifndef _PARSER_H_ +#define _PARSER_H_ + +#include "ruby.h" + +#ifndef HAVE_RUBY_RE_H +#include "re.h" +#endif + +#ifdef HAVE_RUBY_ST_H +#include "ruby/st.h" +#else +#include "st.h" +#endif + +#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) + +/* unicode */ + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF + +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + int max_nesting; + int allow_nan; + int parsing_name; + int symbolize_names; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + int create_additions; + VALUE match_string; + FBuffer *fbuffer; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static UTF32 unescape_unicode(const unsigned char *p); +static int convert_UTF32_to_UTF8(char *buf, UTF32 ch); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd); +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static VALUE convert_encoding(VALUE source); +static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); +static VALUE cParser_parse(VALUE self); +static void JSON_mark(void *json); +static void JSON_free(void *json); +static VALUE cJSON_parser_s_allocate(VALUE klass); +static VALUE cParser_source(VALUE self); +#ifndef ZALLOC +#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type))) +static inline void *ruby_zalloc(size_t n) +{ + void *p = ruby_xmalloc(n); + memset(p, 0, n); + return p; +} +#endif +#ifdef TypedData_Make_Struct +static const rb_data_type_t JSON_Parser_type; +#define NEW_TYPEDDATA_WRAPPER 1 +#else +#define TypedData_Make_Struct(klass, type, ignore, json) Data_Make_Struct(klass, type, NULL, JSON_free, json) +#define TypedData_Get_Struct(self, JSON_Parser, ignore, json) Data_Get_Struct(self, JSON_Parser, json) +#endif + +#endif diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl index 9856a73808..f7dbcffb5f 100644 --- a/ext/json/parser/parser.rl +++ b/ext/json/parser/parser.rl @@ -1,313 +1,31 @@ -#include "ruby.h" #include "../fbuffer/fbuffer.h" +#include "parser.h" -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - -#ifndef HAVE_RB_HASH_BULK_INSERT -// For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) -{ - long index = 0; - while (index < count) { - VALUE name = pairs[index++]; - VALUE value = pairs[index++]; - rb_hash_aset(hash, name, value); - } - RB_GC_GUARD(hash); -} -#endif - -/* name cache */ - -#include <string.h> -#include <ctype.h> - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -static rb_encoding *enc_utf8; - -#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 - -static inline VALUE build_interned_string(const char *str, const long length) -{ -# ifdef HAVE_RB_ENC_INTERNED_STR - return rb_enc_interned_str(str, length, enc_utf8); -# else - VALUE rstring = rb_utf8_str_new(str, length); - return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); -# endif -} - -static inline VALUE build_symbol(const char *str, const long length) -{ - return rb_str_intern(build_interned_string(str, length)); -} - -static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) -{ - MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); - cache->length++; - cache->entries[index] = rstring; -} - -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); - } -} - -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rstring = build_interned_string(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); - } - return rstring; -} - -static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rsymbol = build_symbol(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); - } - return rsymbol; -} - -/* rvalue stack */ - -#define RVALUE_STACK_INITIAL_CAPA 128 - -enum rvalue_stack_type { - RVALUE_STACK_HEAP_ALLOCATED = 0, - RVALUE_STACK_STACK_ALLOCATED = 1, -}; - -typedef struct rvalue_stack_struct { - enum rvalue_stack_type type; - long capa; - long head; - VALUE *ptr; -} rvalue_stack; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); - -static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) -{ - long required = stack->capa * 2; - - if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { - stack = rvalue_stack_spill(stack, handle, stack_ref); - } else { - REALLOC_N(stack->ptr, VALUE, required); - stack->capa = required; - } - return stack; -} - -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) -{ - if (RB_UNLIKELY(stack->head >= stack->capa)) { - stack = rvalue_stack_grow(stack, handle, stack_ref); - } - stack->ptr[stack->head] = value; - stack->head++; -} - -static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) -{ - return stack->ptr + (stack->head - count); -} - -static inline void rvalue_stack_pop(rvalue_stack *stack, long count) -{ - stack->head -= count; -} - -static void rvalue_stack_mark(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); - } -} - -static void rvalue_stack_free(void *ptr) +#if defined HAVE_RUBY_ENCODING_H +# define EXC_ENCODING rb_utf8_encoding(), +# ifndef HAVE_RB_ENC_RAISE +static void +enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) { - rvalue_stack *stack = (rvalue_stack *)ptr; - if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); - } -} - -static size_t rvalue_stack_memsize(const void *ptr) -{ - const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; -} + va_list args; + VALUE mesg; -static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { - .dmark = rvalue_stack_mark, - .dfree = rvalue_stack_free, - .dsize = rvalue_stack_memsize, - }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) -{ - rvalue_stack *stack; - *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - *stack_ref = stack; - MEMCPY(stack, old_stack, rvalue_stack, 1); - - stack->capa = old_stack->capa << 1; - stack->ptr = ALLOC_N(VALUE, stack->capa); - stack->type = RVALUE_STACK_HEAP_ALLOCATED; - MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); - return stack; -} + va_start(args, fmt); + mesg = rb_enc_vsprintf(enc, fmt, args); + va_end(args); -static void rvalue_stack_eagerly_release(VALUE handle) -{ - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); + rb_exc_raise(rb_exc_new3(exc, mesg)); } +# define rb_enc_raise enc_raise +# endif +#else +# define EXC_ENCODING /* nothing */ +# define rb_enc_raise rb_raise +#endif /* unicode */ -static const signed char digit_values[256] = { +static const char digit_values[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, @@ -324,28 +42,26 @@ static const signed char digit_values[256] = { -1, -1, -1, -1, -1, -1, -1 }; -static uint32_t unescape_unicode(const unsigned char *p) +static UTF32 unescape_unicode(const unsigned char *p) { - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; + char b; + UTF32 result = 0; b = digit_values[p[0]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; b = digit_values[p[1]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; b = digit_values[p[2]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; b = digit_values[p[3]]; - if (b < 0) return replacement_char; + if (b < 0) return UNI_REPLACEMENT_CHAR; result = (result << 4) | (unsigned char)b; return result; } -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) +static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) { int len = 1; if (ch <= 0x7F) { @@ -371,78 +87,14 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int in_array; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; - rvalue_cache name_cache; - rvalue_stack *stack; - VALUE stack_handle; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") - -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); - - -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} +static VALUE mJSON, mExt, cParser, eParserError, eNestingError; +static VALUE CNaN, CInfinity, CMinusInfinity; +static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, + i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, + i_object_class, i_array_class, i_decimal_class, i_key_p, + i_deep_const_get, i_match, i_match_string, i_aset, i_aref, + i_leftshift, i_new; %%{ machine JSON_common; @@ -479,25 +131,26 @@ static void raise_parse_error(const char *format, const char *start) write data; action parse_value { - char *np = JSON_parse_value(json, fpc, pe, result, current_nesting); + VALUE v = Qnil; + char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); if (np == NULL) { fhold; fbreak; } else { + if (NIL_P(json->object_class)) { + rb_hash_aset(*result, last_name, v); + } else { + rb_funcall(*result, i_aset, 2, last_name, v); + } fexec np; } } - action allow_trailing_comma { json->allow_trailing_comma } - action parse_name { char *np; - json->parsing_name = true; - np = JSON_parse_string(json, fpc, pe, result); - json->parsing_name = false; - if (np == NULL) { fhold; fbreak; } else { - PUSH(*result); - fexec np; - } + json->parsing_name = 1; + np = JSON_parse_string(json, fpc, pe, &last_name); + json->parsing_name = 0; + if (np == NULL) { fhold; fbreak; } else fexec np; } action exit { fhold; fbreak; } @@ -507,64 +160,37 @@ static void raise_parse_error(const char *format, const char *start) main := ( begin_object - (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* + (pair (next_pair)*)? ignore* end_object ) @exit; }%% -#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) - static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; + VALUE last_name = Qnil; + VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - long stack_head = json->stack->head; + *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); %% write init; %% write exec; if (cs >= JSON_object_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(json->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); - } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(json->stack, count); - - if (RB_UNLIKELY(json->create_additions)) { + if (json->create_additions) { VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + if (NIL_P(json->object_class)) { + klassname = rb_hash_aref(*result, json->create_id); } else { - klassname = rb_hash_aref(*result, json->create_id); + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } *result = rb_funcall(klass, i_json_create, 1, *result); } } @@ -575,6 +201,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } + %%{ machine JSON_value; include JSON_common; @@ -594,24 +221,19 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu if (json->allow_nan) { *result = CNaN; } else { - raise_parse_error("unexpected token at '%s'", p - 2); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); } } action parse_infinity { if (json->allow_nan) { *result = CInfinity; } else { - raise_parse_error("unexpected token at '%s'", p - 7); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); } } action parse_string { char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { - fhold; - fbreak; - } else { - fexec np; - } + if (np == NULL) { fhold; fbreak; } else fexec np; } action parse_number { @@ -622,21 +244,19 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu fexec p + 10; fhold; fbreak; } else { - raise_parse_error("unexpected token at '%s'", p); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); } } - np = JSON_parse_number(json, fpc, pe, result); - if (np != NULL) { - fexec np; - } + np = JSON_parse_float(json, fpc, pe, result); + if (np != NULL) fexec np; + np = JSON_parse_integer(json, fpc, pe, result); + if (np != NULL) fexec np; fhold; fbreak; } action parse_array { char *np; - json->in_array++; np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); - json->in_array--; if (np == NULL) { fhold; fbreak; } else fexec np; } @@ -654,10 +274,10 @@ main := ignore* ( Vtrue @parse_true | VNaN @parse_nan | VInfinity @parse_infinity | - begin_number @parse_number | - begin_string @parse_string | - begin_array @parse_array | - begin_object @parse_object + begin_number >parse_number | + begin_string >parse_string | + begin_array >parse_array | + begin_object >parse_object ) ignore* %*exit; }%% @@ -668,12 +288,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul %% write init; %% write exec; - if (json->freeze) { - OBJ_FREEZE(*result); - } - if (cs >= JSON_value_first_final) { - PUSH(*result); return p; } else { return NULL; @@ -690,40 +305,24 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); }%% -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) { - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } - - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } + int cs = EVIL; - if (negative) { - memo = -memo; - } - return LL2NUM(memo); -} + %% write init; + json->memo = p; + %% write exec; -static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) -{ + if (cs >= JSON_integer_first_final) { long len = p - json->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(json->memo, p); - } else { - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); - } + fbuffer_clear(json->fbuffer); + fbuffer_append(json->fbuffer, json->memo, len); + fbuffer_append_char(json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); return p + 1; + } else { + return NULL; + } } %%{ @@ -733,70 +332,33 @@ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) write data; action exit { fhold; fbreak; } - action isFloat { is_float = true; } main := '-'? ( - (('0' | [1-9][0-9]*) - ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | - ([Ee] [+\-]?[0-9]+)) > isFloat)? - ) (^[0-9Ee.\-]? @exit )); + (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) + | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) + ) (^[0-9Ee.\-]? @exit ); }%% -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; - bool is_float = false; %% write init; json->memo = p; %% write exec; if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(json, p, result); - } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); - - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); - } - } - } - long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); + fbuffer_clear(json->fbuffer); + fbuffer_append(json->fbuffer, json->memo, len); + fbuffer_append_char(json->fbuffer, '\0'); + if (NIL_P(json->decimal_class)) { + *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); + VALUE text; + text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); + *result = rb_funcall(json->decimal_class, i_new, 1, text); } - return p + 1; } else { return NULL; @@ -816,141 +378,57 @@ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *resu if (np == NULL) { fhold; fbreak; } else { + if (NIL_P(json->array_class)) { + rb_ary_push(*result, v); + } else { + rb_funcall(*result, i_leftshift, 1, v); + } fexec np; } } - action allow_trailing_comma { json->allow_trailing_comma } - action exit { fhold; fbreak; } next_element = value_separator ignore* begin_value >parse_value; main := begin_array ignore* ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? + (ignore* next_element ignore*)*)? end_array @exit; }%% static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; + VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - long stack_head = json->stack->head; + *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); %% write init; %% write exec; if(cs >= JSON_array_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(json->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); - *result = array; - } - rvalue_stack_pop(json->stack, count); - return p + 1; } else { - raise_parse_error("unexpected token at '%s'", p); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); return NULL; } } -static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) -{ - if (symbolize) { - intern = true; - } - VALUE result; -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); - } else { - result = rb_utf8_str_new(start, (long)(end - start)); - } -# else - result = rb_utf8_str_new(start, (long)(end - start)); - if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } -# endif - - if (symbolize) { - result = rb_str_intern(result); - } - - return result; -} - -static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - return build_string(string, stringEnd, intern, symbolize); -} - -static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) { - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; + char *p = string, *pe = string, *unescape; int unescape_len; char buf[4]; - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); - } - - VALUE result = rb_str_buf_new(bufferSize); - rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - while (pe < stringEnd) { if (*pe == '\\') { unescape = (char *) "?"; unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } + if (pe > p) rb_str_buf_cat(result, p, pe - p); switch (*++pe) { case 'n': unescape = (char *) "\n"; @@ -975,27 +453,23 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE break; case 'u': if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); + rb_enc_raise( + EXC_ENCODING eParserError, + "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p + ); } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); + UTF32 ch = unescape_unicode((unsigned char *) ++pe); pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { + if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { pe++; if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); + rb_enc_raise( + EXC_ENCODING eParserError, + "%u: incomplete surrogate pair at '%s'", __LINE__, p + ); } if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); + UTF32 sur = unescape_unicode((unsigned char *) pe + 2); ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); pe += 5; @@ -1012,26 +486,13 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE p = pe; continue; } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; + rb_str_buf_cat(result, unescape, unescape_len); p = ++pe; } else { pe++; } } - - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - rb_str_set_len(result, buffer - bufferStart); - - if (symbolize) { - result = rb_str_intern(result); - } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } - + rb_str_buf_cat(result, p, pe - p); return result; } @@ -1041,31 +502,20 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE write data; - action parse_complex_string { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; + action parse_string { + *result = json_string_unescape(*result, json->memo + 1, p); + if (NIL_P(*result)) { + fhold; + fbreak; + } else { + FORCE_UTF8(*result); + fexec p + 1; + } } - action parse_simple_string { - *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } + action exit { fhold; fbreak; } - double_quote = '"'; - escape = '\\'; - control = 0..0x1f; - simple = any - escape - double_quote - control; - - main := double_quote ( - (simple*)( - (double_quote) @parse_simple_string | - ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string - ) - ); + main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; }%% static int @@ -1085,6 +535,7 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu int cs = EVIL; VALUE match_string; + *result = rb_str_buf_new(0); %% write init; json->memo = p; %% write exec; @@ -1100,6 +551,11 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu } } + if (json->symbolize_names && json->parsing_name) { + *result = rb_str_intern(*result); + } else { + rb_str_resize(*result, RSTRING_LEN(*result)); + } if (cs >= JSON_string_first_final) { return p + 1; } else { @@ -1121,80 +577,18 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu static VALUE convert_encoding(VALUE source) { - int encindex = RB_ENCODING_GET(source); - - if (RB_LIKELY(encindex == utf8_encindex)) { - return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; - } - } - - return ST_CONTINUE; -} - -static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) -{ - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - - json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - json->max_nesting = 100; - - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - } - +#ifdef HAVE_RUBY_ENCODING_H + rb_encoding *enc = rb_enc_get(source); + if (enc == rb_ascii8bit_encoding()) { + if (OBJ_FROZEN(source)) { + source = rb_str_dup(source); } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; + FORCE_UTF8(source); + } else { + source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding()); + } +#endif + return source; } /* @@ -1202,6 +596,8 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) * * Creates a new JSON::Ext::Parser instance for the string _source_. * + * Creates a new JSON::Ext::Parser instance for the string _source_. + * * It will be configured by the _opts_ hash. _opts_ can have the following * keys: * @@ -1219,23 +615,112 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) * * *create_additions*: If set to false, the Parser doesn't create * additions even if a matching class and create_id was found. This option * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. - * * *decimal_class*: Specifies which class to use instead of the default - * (Float) when parsing decimal numbers. This class must accept a single - * string argument in its constructor. + * * *object_class*: Defaults to Hash + * * *array_class*: Defaults to Array */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { + VALUE source, opts; GET_PARSER_INIT; - rb_check_arity(argc, 1, 2); - - parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } +#ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH + rb_scan_args(argc, argv, "1:", &source, &opts); +#else + rb_scan_args(argc, argv, "11", &source, &opts); +#endif + if (!NIL_P(opts)) { +#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH + opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); + if (NIL_P(opts)) { + rb_raise(rb_eArgError, "opts needs to be like a hash"); + } else { +#endif + VALUE tmp = ID2SYM(i_max_nesting); + if (option_given_p(opts, tmp)) { + VALUE max_nesting = rb_hash_aref(opts, tmp); + if (RTEST(max_nesting)) { + Check_Type(max_nesting, T_FIXNUM); + json->max_nesting = FIX2INT(max_nesting); + } else { + json->max_nesting = 0; + } + } else { + json->max_nesting = 100; + } + tmp = ID2SYM(i_allow_nan); + if (option_given_p(opts, tmp)) { + json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->allow_nan = 0; + } + tmp = ID2SYM(i_symbolize_names); + if (option_given_p(opts, tmp)) { + json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; + } else { + json->symbolize_names = 0; + } + tmp = ID2SYM(i_create_additions); + if (option_given_p(opts, tmp)) { + json->create_additions = RTEST(rb_hash_aref(opts, tmp)); + } else { + json->create_additions = 0; + } + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + tmp = ID2SYM(i_create_id); + if (option_given_p(opts, tmp)) { + json->create_id = rb_hash_aref(opts, tmp); + } else { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + tmp = ID2SYM(i_object_class); + if (option_given_p(opts, tmp)) { + json->object_class = rb_hash_aref(opts, tmp); + } else { + json->object_class = Qnil; + } + tmp = ID2SYM(i_array_class); + if (option_given_p(opts, tmp)) { + json->array_class = rb_hash_aref(opts, tmp); + } else { + json->array_class = Qnil; + } + tmp = ID2SYM(i_decimal_class); + if (option_given_p(opts, tmp)) { + json->decimal_class = rb_hash_aref(opts, tmp); + } else { + json->decimal_class = Qnil; + } + tmp = ID2SYM(i_match_string); + if (option_given_p(opts, tmp)) { + VALUE match_string = rb_hash_aref(opts, tmp); + json->match_string = RTEST(match_string) ? match_string : Qnil; + } else { + json->match_string = Qnil; + } +#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH + } +#endif + } else { + json->max_nesting = 100; + json->allow_nan = 0; + json->create_additions = 0; + json->create_id = rb_funcall(mJSON, i_create_id, 0); + json->object_class = Qnil; + json->array_class = Qnil; + json->decimal_class = Qnil; + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source);; + json->Vsource = source; return self; } @@ -1261,123 +746,67 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) * * Parses the current JSON text _source_ and returns the complete data * structure as a result. - * It raises JSON::ParserError if fail to parse. */ static VALUE cParser_parse(VALUE self) { - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, source, opts); + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; + GET_PARSER; - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return Qnil; + } } static void JSON_mark(void *ptr) { JSON_Parser *json = ptr; - rb_gc_mark(json->Vsource); - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); - rb_gc_mark(json->stack_handle); - - long index; - for (index = 0; index < json->name_cache.length; index++) { - rb_gc_mark(json->name_cache.entries[index]); - } + rb_gc_mark_maybe(json->Vsource); + rb_gc_mark_maybe(json->create_id); + rb_gc_mark_maybe(json->object_class); + rb_gc_mark_maybe(json->array_class); + rb_gc_mark_maybe(json->decimal_class); + rb_gc_mark_maybe(json->match_string); } static void JSON_free(void *ptr) { JSON_Parser *json = ptr; - fbuffer_free(&json->fbuffer); + fbuffer_free(json->fbuffer); ruby_xfree(json); } static size_t JSON_memsize(const void *ptr) { const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); + return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); } +#ifdef NEW_TYPEDDATA_WRAPPER static const rb_data_type_t JSON_Parser_type = { "JSON/Parser", {JSON_mark, JSON_free, JSON_memsize,}, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY 0, 0, RUBY_TYPED_FREE_IMMEDIATELY, +#endif }; +#endif static VALUE cJSON_parser_s_allocate(VALUE klass) { JSON_Parser *json; VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); + json->fbuffer = fbuffer_alloc(0); return obj; } @@ -1395,65 +824,40 @@ static VALUE cParser_source(VALUE self) void Init_parser(void) { -#ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); -#endif - -#undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); mExt = rb_define_module_under(mJSON, "Ext"); cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + eParserError = rb_path2class("JSON::ParserError"); eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); rb_define_alloc_func(cParser, cJSON_parser_s_allocate); rb_define_method(cParser, "initialize", cParser_initialize, -1); rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); - rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); - - rb_global_variable(&Encoding_UTF_8); - Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); - - sym_max_nesting = ID2SYM(rb_intern("max_nesting")); - sym_allow_nan = ID2SYM(rb_intern("allow_nan")); - sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); - sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); - sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); - sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); + i_create_id = rb_intern("create_id"); + i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); + i_max_nesting = rb_intern("max_nesting"); + i_allow_nan = rb_intern("allow_nan"); + i_symbolize_names = rb_intern("symbolize_names"); + i_object_class = rb_intern("object_class"); + i_array_class = rb_intern("array_class"); + i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); + i_match_string = rb_intern("match_string"); + i_key_p = rb_intern("key?"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); - i_try_convert = rb_intern("try_convert"); - i_uminus = rb_intern("-@"); - i_encode = rb_intern("encode"); - - binary_encindex = rb_ascii8bit_encindex(); - utf8_encindex = rb_utf8_encindex(); - enc_utf8 = rb_utf8_encoding(); } /* diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk index fc59169056..be7bcb4319 100644 --- a/ext/json/parser/prereq.mk +++ b/ext/json/parser/prereq.mk @@ -5,9 +5,6 @@ RAGEL = ragel .rl.c: $(RAGEL) -G2 $< $(BASERUBY) -pli -e '$$_.sub!(/[ \t]+$$/, "")' \ - -e '$$_.sub!(/^static const int (JSON_.*=.*);$$/, "enum {\\1};")' \ - -e '$$_.sub!(/^(static const char) (_JSON(?:_\w+)?_nfa_\w+)(?=\[\] =)/, "\\1 MAYBE_UNUSED(\\2)")' \ - -e '$$_.sub!(/0 <= ([\( ]+\*[\( ]*p\)+) && \1 <= 31/, "0 <= (signed char)(*(p)) && (*(p)) <= 31")' \ - -e '$$_ = "/* This file is automatically generated from parser.rl by using ragel */\n" + $$_ if $$. == 1' $@ + -e '$$_.sub!(/^static const int (JSON_.*=.*);$$/, "enum {\\1};")' $@ parser.c: |
