diff options
Diffstat (limited to 'ext/json/parser')
| -rw-r--r-- | ext/json/parser/depend | 182 | ||||
| -rw-r--r-- | ext/json/parser/extconf.rb | 13 | ||||
| -rw-r--r-- | ext/json/parser/parser.c | 3432 | ||||
| -rw-r--r-- | ext/json/parser/parser.h | 77 | ||||
| -rw-r--r-- | ext/json/parser/parser.rl | 927 | ||||
| -rw-r--r-- | ext/json/parser/prereq.mk | 9 |
6 files changed, 1644 insertions, 2996 deletions
diff --git a/ext/json/parser/depend b/ext/json/parser/depend index d188844670..d4737b1dfb 100644 --- a/ext/json/parser/depend +++ b/ext/json/parser/depend @@ -1,2 +1,182 @@ $(OBJS): $(ruby_headers) -parser.o: parser.c parser.h $(srcdir)/../fbuffer/fbuffer.h +parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h + +# AUTOGENERATED DEPENDENCIES START +parser.o: $(RUBY_EXTCONF_H) +parser.o: $(arch_hdrdir)/ruby/config.h +parser.o: $(hdrdir)/ruby.h +parser.o: $(hdrdir)/ruby/assert.h +parser.o: $(hdrdir)/ruby/backward.h +parser.o: $(hdrdir)/ruby/backward/2/assume.h +parser.o: $(hdrdir)/ruby/backward/2/attributes.h +parser.o: $(hdrdir)/ruby/backward/2/bool.h +parser.o: $(hdrdir)/ruby/backward/2/inttypes.h +parser.o: $(hdrdir)/ruby/backward/2/limits.h +parser.o: $(hdrdir)/ruby/backward/2/long_long.h +parser.o: $(hdrdir)/ruby/backward/2/stdalign.h +parser.o: $(hdrdir)/ruby/backward/2/stdarg.h +parser.o: $(hdrdir)/ruby/defines.h +parser.o: $(hdrdir)/ruby/encoding.h +parser.o: $(hdrdir)/ruby/intern.h +parser.o: $(hdrdir)/ruby/internal/abi.h +parser.o: $(hdrdir)/ruby/internal/anyargs.h +parser.o: $(hdrdir)/ruby/internal/arithmetic.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/char.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/double.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/int.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/long.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/short.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +parser.o: $(hdrdir)/ruby/internal/assume.h +parser.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +parser.o: $(hdrdir)/ruby/internal/attr/artificial.h +parser.o: $(hdrdir)/ruby/internal/attr/cold.h +parser.o: $(hdrdir)/ruby/internal/attr/const.h +parser.o: $(hdrdir)/ruby/internal/attr/constexpr.h +parser.o: $(hdrdir)/ruby/internal/attr/deprecated.h +parser.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +parser.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +parser.o: $(hdrdir)/ruby/internal/attr/error.h +parser.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +parser.o: $(hdrdir)/ruby/internal/attr/forceinline.h +parser.o: $(hdrdir)/ruby/internal/attr/format.h +parser.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +parser.o: $(hdrdir)/ruby/internal/attr/noalias.h +parser.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +parser.o: $(hdrdir)/ruby/internal/attr/noexcept.h +parser.o: $(hdrdir)/ruby/internal/attr/noinline.h +parser.o: $(hdrdir)/ruby/internal/attr/nonnull.h +parser.o: $(hdrdir)/ruby/internal/attr/noreturn.h +parser.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +parser.o: $(hdrdir)/ruby/internal/attr/pure.h +parser.o: $(hdrdir)/ruby/internal/attr/restrict.h +parser.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +parser.o: $(hdrdir)/ruby/internal/attr/warning.h +parser.o: $(hdrdir)/ruby/internal/attr/weakref.h +parser.o: $(hdrdir)/ruby/internal/cast.h +parser.o: $(hdrdir)/ruby/internal/compiler_is.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +parser.o: $(hdrdir)/ruby/internal/compiler_since.h +parser.o: $(hdrdir)/ruby/internal/config.h +parser.o: $(hdrdir)/ruby/internal/constant_p.h +parser.o: $(hdrdir)/ruby/internal/core.h +parser.o: $(hdrdir)/ruby/internal/core/rarray.h +parser.o: $(hdrdir)/ruby/internal/core/rbasic.h +parser.o: $(hdrdir)/ruby/internal/core/rbignum.h +parser.o: $(hdrdir)/ruby/internal/core/rclass.h +parser.o: $(hdrdir)/ruby/internal/core/rdata.h +parser.o: $(hdrdir)/ruby/internal/core/rfile.h +parser.o: $(hdrdir)/ruby/internal/core/rhash.h +parser.o: $(hdrdir)/ruby/internal/core/robject.h +parser.o: $(hdrdir)/ruby/internal/core/rregexp.h +parser.o: $(hdrdir)/ruby/internal/core/rstring.h +parser.o: $(hdrdir)/ruby/internal/core/rstruct.h +parser.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +parser.o: $(hdrdir)/ruby/internal/ctype.h +parser.o: $(hdrdir)/ruby/internal/dllexport.h +parser.o: $(hdrdir)/ruby/internal/dosish.h +parser.o: $(hdrdir)/ruby/internal/encoding/coderange.h +parser.o: $(hdrdir)/ruby/internal/encoding/ctype.h +parser.o: $(hdrdir)/ruby/internal/encoding/encoding.h +parser.o: $(hdrdir)/ruby/internal/encoding/pathname.h +parser.o: $(hdrdir)/ruby/internal/encoding/re.h +parser.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +parser.o: $(hdrdir)/ruby/internal/encoding/string.h +parser.o: $(hdrdir)/ruby/internal/encoding/symbol.h +parser.o: $(hdrdir)/ruby/internal/encoding/transcode.h +parser.o: $(hdrdir)/ruby/internal/error.h +parser.o: $(hdrdir)/ruby/internal/eval.h +parser.o: $(hdrdir)/ruby/internal/event.h +parser.o: $(hdrdir)/ruby/internal/fl_type.h +parser.o: $(hdrdir)/ruby/internal/gc.h +parser.o: $(hdrdir)/ruby/internal/glob.h +parser.o: $(hdrdir)/ruby/internal/globals.h +parser.o: $(hdrdir)/ruby/internal/has/attribute.h +parser.o: $(hdrdir)/ruby/internal/has/builtin.h +parser.o: $(hdrdir)/ruby/internal/has/c_attribute.h +parser.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +parser.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +parser.o: $(hdrdir)/ruby/internal/has/extension.h +parser.o: $(hdrdir)/ruby/internal/has/feature.h +parser.o: $(hdrdir)/ruby/internal/has/warning.h +parser.o: $(hdrdir)/ruby/internal/intern/array.h +parser.o: $(hdrdir)/ruby/internal/intern/bignum.h +parser.o: $(hdrdir)/ruby/internal/intern/class.h +parser.o: $(hdrdir)/ruby/internal/intern/compar.h +parser.o: $(hdrdir)/ruby/internal/intern/complex.h +parser.o: $(hdrdir)/ruby/internal/intern/cont.h +parser.o: $(hdrdir)/ruby/internal/intern/dir.h +parser.o: $(hdrdir)/ruby/internal/intern/enum.h +parser.o: $(hdrdir)/ruby/internal/intern/enumerator.h +parser.o: $(hdrdir)/ruby/internal/intern/error.h +parser.o: $(hdrdir)/ruby/internal/intern/eval.h +parser.o: $(hdrdir)/ruby/internal/intern/file.h +parser.o: $(hdrdir)/ruby/internal/intern/hash.h +parser.o: $(hdrdir)/ruby/internal/intern/io.h +parser.o: $(hdrdir)/ruby/internal/intern/load.h +parser.o: $(hdrdir)/ruby/internal/intern/marshal.h +parser.o: $(hdrdir)/ruby/internal/intern/numeric.h +parser.o: $(hdrdir)/ruby/internal/intern/object.h +parser.o: $(hdrdir)/ruby/internal/intern/parse.h +parser.o: $(hdrdir)/ruby/internal/intern/proc.h +parser.o: $(hdrdir)/ruby/internal/intern/process.h +parser.o: $(hdrdir)/ruby/internal/intern/random.h +parser.o: $(hdrdir)/ruby/internal/intern/range.h +parser.o: $(hdrdir)/ruby/internal/intern/rational.h +parser.o: $(hdrdir)/ruby/internal/intern/re.h +parser.o: $(hdrdir)/ruby/internal/intern/ruby.h +parser.o: $(hdrdir)/ruby/internal/intern/select.h +parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +parser.o: $(hdrdir)/ruby/internal/intern/set.h +parser.o: $(hdrdir)/ruby/internal/intern/signal.h +parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h +parser.o: $(hdrdir)/ruby/internal/intern/string.h +parser.o: $(hdrdir)/ruby/internal/intern/struct.h +parser.o: $(hdrdir)/ruby/internal/intern/thread.h +parser.o: $(hdrdir)/ruby/internal/intern/time.h +parser.o: $(hdrdir)/ruby/internal/intern/variable.h +parser.o: $(hdrdir)/ruby/internal/intern/vm.h +parser.o: $(hdrdir)/ruby/internal/interpreter.h +parser.o: $(hdrdir)/ruby/internal/iterator.h +parser.o: $(hdrdir)/ruby/internal/memory.h +parser.o: $(hdrdir)/ruby/internal/method.h +parser.o: $(hdrdir)/ruby/internal/module.h +parser.o: $(hdrdir)/ruby/internal/newobj.h +parser.o: $(hdrdir)/ruby/internal/scan_args.h +parser.o: $(hdrdir)/ruby/internal/special_consts.h +parser.o: $(hdrdir)/ruby/internal/static_assert.h +parser.o: $(hdrdir)/ruby/internal/stdalign.h +parser.o: $(hdrdir)/ruby/internal/stdbool.h +parser.o: $(hdrdir)/ruby/internal/stdckdint.h +parser.o: $(hdrdir)/ruby/internal/symbol.h +parser.o: $(hdrdir)/ruby/internal/value.h +parser.o: $(hdrdir)/ruby/internal/value_type.h +parser.o: $(hdrdir)/ruby/internal/variable.h +parser.o: $(hdrdir)/ruby/internal/warning_push.h +parser.o: $(hdrdir)/ruby/internal/xmalloc.h +parser.o: $(hdrdir)/ruby/missing.h +parser.o: $(hdrdir)/ruby/onigmo.h +parser.o: $(hdrdir)/ruby/oniguruma.h +parser.o: $(hdrdir)/ruby/ruby.h +parser.o: $(hdrdir)/ruby/st.h +parser.o: $(hdrdir)/ruby/subst.h +parser.o: $(srcdir)/../fbuffer/fbuffer.h +parser.o: $(srcdir)/../json.h +parser.o: $(srcdir)/../simd/simd.h +parser.o: $(srcdir)/../vendor/ryu.h +parser.o: parser.c +# AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index ae4f861c79..2440e66d8b 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,3 +1,16 @@ +# frozen_string_literal: true require 'mkmf' +$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" +have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 +have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 +have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 +have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby + +append_cflags("-std=c99") + +if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" +end + create_makefile 'json/ext/parser' diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index df89f2c58b..f1ea1b6abb 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,47 +1,301 @@ +#include "../json.h" +#include "../vendor/ryu.h" +#include "../simd/simd.h" -#line 1 "parser.rl" -#include "../fbuffer/fbuffer.h" -#include "parser.h" +static VALUE mJSON, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; -/* unicode */ +static ID i_new, i_try_convert, i_uminus, i_encode; -static const char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze, + sym_decimal_class, sym_on_load, sym_allow_duplicate_key; + +static int binary_encindex; +static int utf8_encindex; + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +static void +rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + +#ifndef HAVE_RB_HASH_NEW_CAPA +#define rb_hash_new_capa(n) rb_hash_new() +#endif + +#ifndef HAVE_RB_STR_TO_INTERNED_STR +static VALUE rb_str_to_interned_str(VALUE str) +{ + return rb_funcall(rb_str_freeze(str), i_uminus, 0); +} +#endif + +/* name cache */ + +#include <string.h> +#include <ctype.h> + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +#define rstring_cache_memcmp memcmp + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +#if __has_builtin(__builtin_bswap64) +#undef rstring_cache_memcmp +ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length) +{ + // The libc memcmp has numerous complex optimizations, but in this particular case, + // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to + // inline a simpler memcmp outperforms calling the libc version. + long i = 0; + + for (; i + 8 <= length; i += 8) { + uint64_t a, b; + memcpy(&a, str + i, 8); + memcpy(&b, rptr + i, 8); + if (a != b) { + a = __builtin_bswap64(a); + b = __builtin_bswap64(b); + return (a < b) ? -1 : 1; + } + } + + for (; i < length; i++) { + if (str[i] != rptr[i]) { + return (str[i] < rptr[i]) ? -1 : 1; + } + } + + return 0; +} +#endif +#endif + +ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + const char *rstring_ptr; + long rstring_length; + + RSTRING_GETMEM(rstring, rstring_ptr, rstring_length); + + if (length == rstring_length) { + return rstring_cache_memcmp(str, rstring_ptr, length); + } else { + return (int)(length - rstring_length); + } +} + +ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + int low = 0; + int high = cache->length - 1; + + while (low <= high) { + int mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + int cmp = rstring_cache_cmp(str, length, entry); + + if (cmp == 0) { + return entry; + } else if (cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + rvalue_cache_insert_at(cache, low, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + int low = 0; + int high = cache->length - 1; + + while (low <= high) { + int mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (cmp == 0) { + return entry; + } else if (cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + rvalue_cache_insert_at(cache, low, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, }; -static UTF32 unescape_unicode(const unsigned char *p) +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) { - char b; - UTF32 result = 0; - b = digit_values[p[0]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | b; - b = digit_values[p[1]]; - result = (result << 4) | b; - if (b < 0) return UNI_REPLACEMENT_CHAR; - b = digit_values[p[2]]; - result = (result << 4) | b; - if (b < 0) return UNI_REPLACEMENT_CHAR; - b = digit_values[p[3]]; - result = (result << 4) | b; - if (b < 0) return UNI_REPLACEMENT_CHAR; - return result; + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; + return value; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); } -static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + long index; + for (index = 0; index < stack->head; index++) { + rb_gc_mark(stack->ptr[index]); + } +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + if (handle) { + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); + } +} + +static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) { int len = 1; if (ch <= 0x7F) { @@ -67,1488 +321,1079 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) return len; } -#ifdef HAVE_RUBY_ENCODING_H -static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE, - CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE; -static ID i_encoding, i_encode; -#else -static ID i_iconv; -#endif +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; -static VALUE mJSON, mExt, cParser, eParserError, eNestingError; -static VALUE CNaN, CInfinity, CMinusInfinity; +typedef struct JSON_ParserStruct { + VALUE on_load_proc; + VALUE decimal_class; + ID decimal_method_id; + enum duplicate_key_action on_duplicate_key; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool allow_control_characters; + bool symbolize_names; + bool freeze; +} JSON_ParserConfig; + +typedef struct JSON_ParserStateStruct { + VALUE stack_handle; + const char *start; + const char *cursor; + const char *end; + rvalue_stack *stack; + rvalue_cache name_cache; + int in_array; + int current_nesting; +} JSON_ParserState; + +static inline size_t rest(JSON_ParserState *state) { + return state->end - state->cursor; +} + +static inline bool eos(JSON_ParserState *state) { + return state->cursor >= state->end; +} + +static inline char peek(JSON_ParserState *state) +{ + if (RB_UNLIKELY(eos(state))) { + return 0; + } + return *state->cursor; +} + +static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out) +{ + const char *cursor = state->cursor; + long column = 0; + long line = 1; -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_quirks_mode, - i_object_class, i_array_class, i_key_p, i_deep_const_get, i_match, - i_match_string, i_aset, i_aref, i_leftshift; + while (cursor >= state->start) { + if (*cursor-- == '\n') { + break; + } + column++; + } + while (cursor >= state->start) { + if (*cursor-- == '\n') { + line++; + } + } + *line_out = line; + *column_out = column; +} -#line 110 "parser.rl" +static void emit_parse_warning(const char *message, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column); + rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning); +} +#define PARSE_ERROR_FRAGMENT_LEN 32 -#line 92 "parser.c" -static const int JSON_object_start = 1; -static const int JSON_object_first_final = 27; -static const int JSON_object_error = 0; +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error(const char *format, JSON_ParserState *state) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; + long line, column; + cursor_position(state, &line, &column); + + const char *ptr = "EOF"; + if (state->cursor && state->cursor < state->end) { + ptr = state->cursor; + size_t len = 0; + while (len < PARSE_ERROR_FRAGMENT_LEN) { + char ch = ptr[len]; + if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') { + break; + } + len++; + } -static const int JSON_object_en_main = 1; + if (len) { + buffer[0] = '\''; + MEMCPY(buffer + 1, ptr, char, len); + + while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte + len--; + } + if (buffer[len] >= 0xC0) { // multibyte character start + len--; + } -#line 151 "parser.rl" + buffer[len + 1] = '\''; + buffer[len + 2] = '\0'; + ptr = (const char *)buffer; + } + } + VALUE msg = rb_sprintf(format, ptr); + VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column); + RB_GC_GUARD(msg); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) + VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message); + rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line)); + rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column)); + rb_exc_raise(exc); +} + +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at) { - int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; + state->cursor = at; + raise_parse_error(format, state); +} + +/* unicode */ + +static const signed char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; - if (json->max_nesting && json->current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); +static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe) +{ + if (RB_UNLIKELY(sp > spe - 4)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); - - -#line 116 "parser.c" - { - cs = JSON_object_start; - } - -#line 166 "parser.rl" - -#line 123 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 123 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 47: goto st23; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st2; - goto st0; -tr2: -#line 133 "parser.rl" - { - char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, p, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} + const unsigned char *p = (const unsigned char *)sp; + + const signed char b0 = digit_values[p[0]]; + const signed char b1 = digit_values[p[1]]; + const signed char b2 = digit_values[p[2]]; + const signed char b3 = digit_values[p[3]]; + + if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 164 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 58: goto st8; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr11; - case 45: goto tr11; - case 47: goto st19; - case 73: goto tr11; - case 78: goto tr11; - case 91: goto tr11; - case 102: goto tr11; - case 110: goto tr11; - case 116: goto tr11; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr11; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -tr11: -#line 118 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v); - if (np == NULL) { - p--; {p++; cs = 9; goto _out;} - } else { - if (NIL_P(json->object_class)) { - rb_hash_aset(*result, last_name, v); + + return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3; +} + +#define GET_PARSER_CONFIG \ + JSON_ParserConfig *config; \ + TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config) + +static const rb_data_type_t JSON_ParserConfig_type; + +static void +json_eat_comments(JSON_ParserState *state) +{ + const char *start = state->cursor; + state->cursor++; + + switch (peek(state)) { + case '/': { + state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); + if (!state->cursor) { + state->cursor = state->end; } else { - rb_funcall(*result, i_aset, 2, last_name, v); + state->cursor++; } - {p = (( np))-1;} + break; } - } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 251 "parser.c" - switch( (*p) ) { - case 13: goto st9; - case 32: goto st9; - case 44: goto st10; - case 47: goto st15; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st9; - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 34: goto tr2; - case 47: goto st11; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st12; - case 47: goto st14; - } - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 42 ) - goto st13; - goto st12; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st10; - } - goto st12; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 10 ) - goto st10; - goto st14; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - switch( (*p) ) { - case 42: goto st16; - case 47: goto st18; - } - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 42 ) - goto st17; - goto st16; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st9; - } - goto st16; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 10 ) - goto st9; - goto st18; -tr4: -#line 141 "parser.rl" - { p--; {p++; cs = 27; goto _out;} } - goto st27; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: -#line 347 "parser.c" - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st22; - } - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 42 ) - goto st21; - goto st20; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - switch( (*p) ) { - case 42: goto st21; - case 47: goto st8; - } - goto st20; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - if ( (*p) == 10 ) - goto st8; - goto st22; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - switch( (*p) ) { - case 42: goto st24; - case 47: goto st26; - } - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - if ( (*p) == 42 ) - goto st25; - goto st24; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st2; - } - goto st24; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - if ( (*p) == 10 ) - goto st2; - goto st26; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 167 "parser.rl" - - if (cs >= JSON_object_first_final) { - if (json->create_additions) { - VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); - } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - *result = rb_funcall(klass, i_json_create, 1, *result); + case '*': { + state->cursor++; + + while (true) { + const char *next_match = memchr(state->cursor, '*', state->end - state->cursor); + if (!next_match) { + raise_parse_error_at("unterminated comment, expected closing '*/'", state, start); + } + + state->cursor = next_match + 1; + if (peek(state) == '/') { + state->cursor++; + break; } } + break; } - return p + 1; - } else { - return NULL; + default: + raise_parse_error_at("unexpected token %s", state, start); + break; } } +ALWAYS_INLINE(static) void +json_eat_whitespace(JSON_ParserState *state) +{ + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + if (chunk == 0x2020202020202020) { + state->cursor += 8; + continue; + } + uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + state->cursor += consecutive_spaces; + break; + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; + } + } +} -#line 470 "parser.c" -static const int JSON_value_start = 1; -static const int JSON_value_first_final = 21; -static const int JSON_value_error = 0; +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); + } else { + result = rb_utf8_str_new(start, (long)(end - start)); + } +# else + result = rb_utf8_str_new(start, (long)(end - start)); + if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } +# endif -static const int JSON_value_en_main = 1; + if (symbolize) { + result = rb_str_intern(result); + } + return result; +} + +static inline bool json_string_cacheable_p(const char *string, size_t length) +{ + // We mostly want to cache strings that are likely to be repeated. + // Simple heuristics: + // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold. + // - If the first character isn't a letter, we're much less likely to see this string again. + return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]); +} -#line 271 "parser.rl" +static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + + if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} +#define JSON_MAX_UNESCAPE_POSITIONS 16 +typedef struct _json_unescape_positions { + long size; + const char **positions; + unsigned long additional_backslashes; +} JSON_UnescapePositions; -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) +static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions) { - int cs = EVIL; - - -#line 486 "parser.c" - { - cs = JSON_value_start; - } - -#line 278 "parser.rl" - -#line 493 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 34: goto tr0; - case 45: goto tr2; - case 73: goto st2; - case 78: goto st9; - case 91: goto tr5; - case 102: goto st11; - case 110: goto st15; - case 116: goto st18; - case 123: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - goto st0; -st0: -cs = 0; - goto _out; -tr0: -#line 219 "parser.rl" - { - char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} + while (positions->size) { + positions->size--; + const char *next_position = positions->positions[0]; + positions->positions++; + if (next_position >= pe) { + return next_position; + } } - goto st21; -tr2: -#line 224 "parser.rl" - { - char *np; - if(pe > p + 9 - json->quirks_mode && !strncmp(MinusInfinity, p, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - {p = (( p + 10))-1;} - p--; {p++; cs = 21; goto _out;} - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); + + if (positions->additional_backslashes) { + positions->additional_backslashes--; + return memchr(pe, '\\', stringEnd - pe); + } + + return NULL; +} + +NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + const char *p = string, *pe = string, *bufferStart; + char *buffer; + + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = RSTRING_PTR(result); + bufferStart = buffer; + +#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe; + + while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) { + if (pe > p) { + MEMCPY(buffer, p, char, pe - p); + buffer += pe - p; + } + switch (*++pe) { + case '"': + case '/': + p = pe; // nothing to unescape just need to skip the backslash + break; + case '\\': + APPEND_CHAR('\\'); + break; + case 'n': + APPEND_CHAR('\n'); + break; + case 'r': + APPEND_CHAR('\r'); + break; + case 't': + APPEND_CHAR('\t'); + break; + case 'b': + APPEND_CHAR('\b'); + break; + case 'f': + APPEND_CHAR('\f'); + break; + case 'u': { + uint32_t ch = unescape_unicode(state, ++pe, stringEnd); + pe += 3; + /* To handle values above U+FFFF, we take a sequence of + * \uXXXX escapes in the U+D800..U+DBFF then + * U+DC00..U+DFFF ranges, take the low 10 bits from each + * to make a 20-bit number, then add 0x10000 to get the + * final codepoint. + * + * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling + * Surrogate Pairs in UTF-16", and 23.6 "Surrogates + * Area". + */ + if ((ch & 0xFC00) == 0xD800) { + pe++; + if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) { + uint32_t sur = unescape_unicode(state, pe + 2, stringEnd); + + if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) { + raise_parse_error_at("invalid surrogate pair at %s", state, p); + } + + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); + pe += 5; + } else { + raise_parse_error_at("incomplete surrogate pair at %s", state, p); + break; + } + } + + int unescape_len = convert_UTF32_to_UTF8(buffer, ch); + buffer += unescape_len; + p = ++pe; + break; } + default: + if ((unsigned char)*pe < 0x20) { + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); + } + break; } - np = JSON_parse_float(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} - np = JSON_parse_integer(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} - p--; {p++; cs = 21; goto _out;} } - goto st21; -tr5: -#line 242 "parser.rl" - { - char *np; - json->current_nesting++; - np = JSON_parse_array(json, p, pe, result); - json->current_nesting--; - if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} +#undef APPEND_CHAR + + if (stringEnd > p) { + MEMCPY(buffer, p, char, stringEnd - p); + buffer += stringEnd - p; } - goto st21; -tr9: -#line 250 "parser.rl" - { - char *np; - json->current_nesting++; - np = JSON_parse_object(json, p, pe, result); - json->current_nesting--; - if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;} + rb_str_set_len(result, buffer - bufferStart); + + if (symbolize) { + result = rb_str_intern(result); + } else if (intern) { + result = rb_str_to_interned_str(result); } - goto st21; -tr16: -#line 212 "parser.rl" - { - if (json->allow_nan) { - *result = CInfinity; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); + + return result; +} + +#define MAX_FAST_INTEGER_SIZE 18 + +static VALUE json_decode_large_integer(const char *start, long len) +{ + VALUE buffer_v; + char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = rb_cstr2inum(buffer, 10); + RB_ALLOCV_END(buffer_v); + return number; +} + +static inline VALUE +json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end) +{ + if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) { + if (negative) { + return INT64T2NUM(-((int64_t)mantissa)); } + return UINT64T2NUM(mantissa); } - goto st21; -tr18: -#line 205 "parser.rl" - { - if (json->allow_nan) { - *result = CNaN; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); - } + + return json_decode_large_integer(start, end - start); +} + +static VALUE json_decode_large_float(const char *start, long len) +{ + if (RB_LIKELY(len < 64)) { + char buffer[64]; + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + return DBL2NUM(rb_cstr_to_dbl(buffer, 1)); } - goto st21; -tr22: -#line 199 "parser.rl" - { - *result = Qfalse; + + VALUE buffer_v; + char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1)); + RB_ALLOCV_END(buffer_v); + return number; +} + +/* Ruby JSON optimized float decoder using vendored Ryu algorithm + * Accepts pre-extracted mantissa and exponent from first-pass validation + */ +static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative, + const char *start, const char *end) +{ + if (RB_UNLIKELY(config->decimal_class)) { + VALUE text = rb_str_new(start, end - start); + return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text); } - goto st21; -tr25: -#line 196 "parser.rl" - { - *result = Qnil; + + // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case) + // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308) + if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) { + return json_decode_large_float(start, end - start); } - goto st21; -tr28: -#line 202 "parser.rl" - { - *result = Qtrue; + + return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative)); +} + +static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count) +{ + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); + rvalue_stack_pop(state->stack, count); + + if (config->freeze) { + RB_OBJ_FREEZE(array); } - goto st21; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: -#line 258 "parser.rl" - { p--; {p++; cs = 21; goto _out;} } -#line 608 "parser.c" - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 110 ) - goto st3; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 102 ) - goto st4; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - if ( (*p) == 105 ) - goto st5; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 110 ) - goto st6; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) == 105 ) - goto st7; - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 116 ) - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) == 121 ) - goto tr16; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 97 ) - goto st10; - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 78 ) - goto tr18; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - if ( (*p) == 97 ) - goto st12; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 108 ) - goto st13; - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 115 ) - goto st14; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 101 ) - goto tr22; - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 117 ) - goto st16; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 108 ) - goto st17; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 108 ) - goto tr25; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 114 ) - goto st19; - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 117 ) - goto st20; - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 101 ) - goto tr28; - goto st0; - } - _test_eof21: cs = 21; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 279 "parser.rl" - - if (cs >= JSON_value_first_final) { - return p; - } else { - return NULL; + + return array; +} + +static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs) +{ + VALUE set = rb_hash_new_capa(count / 2); + for (size_t index = 0; index < count; index += 2) { + size_t before = RHASH_SIZE(set); + VALUE key = pairs[index]; + rb_hash_aset(set, key, Qtrue); + if (RHASH_SIZE(set) == before) { + if (RB_SYMBOL_P(key)) { + return rb_sym2str(key); + } + return key; + } } + return Qfalse; } +static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", + rb_inspect(duplicate_key) + ); -#line 779 "parser.c" -static const int JSON_integer_start = 1; -static const int JSON_integer_first_final = 3; -static const int JSON_integer_error = 0; + emit_parse_warning(RSTRING_PTR(message), state); + RB_GC_GUARD(message); +} -static const int JSON_integer_en_main = 1; +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "duplicate key %"PRIsVALUE, + rb_inspect(duplicate_key) + ); + + raise_parse_error(RSTRING_PTR(message), state); + RB_GC_GUARD(message); +} +static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count) +{ + size_t entries_count = count / 2; + VALUE object = rb_hash_new_capa(entries_count); + const VALUE *pairs = rvalue_stack_peek(state->stack, count); + rb_hash_bulk_insert(count, pairs, object); + + if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) { + switch (config->on_duplicate_key) { + case JSON_IGNORE: + break; + case JSON_DEPRECATED: + emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); + break; + case JSON_RAISE: + raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs)); + break; + } + } -#line 295 "parser.rl" + rvalue_stack_pop(state->stack, count); + if (config->freeze) { + RB_OBJ_FREEZE(object); + } -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) + return object; +} + +static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value) { - int cs = EVIL; - - -#line 795 "parser.c" - { - cs = JSON_integer_start; - } - -#line 302 "parser.rl" - json->memo = p; - -#line 803 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr4; -tr4: -#line 292 "parser.rl" - { p--; {p++; cs = 4; goto _out;} } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 844 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - goto tr4; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 304 "parser.rl" - - if (cs >= JSON_integer_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); - return p + 1; - } else { - return NULL; + if (RB_UNLIKELY(config->on_load_proc)) { + value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil); } + rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack); + return value; } +static const bool string_scan_table[256] = { + // ASCII Control Characters + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // ASCII Characters + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; -#line 878 "parser.c" -static const int JSON_float_start = 1; -static const int JSON_float_first_final = 8; -static const int JSON_float_error = 0; +#ifdef HAVE_SIMD +static SIMD_Implementation simd_impl = SIMD_NONE; +#endif /* HAVE_SIMD */ -static const int JSON_float_en_main = 1; +ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + uint64_t mask = 0; + if (string_scan_simd_neon(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros64(mask) >> 2; + return true; + } -#line 329 "parser.rl" +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + int mask = 0; + if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros(mask); + return true; + } + } +#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ +#endif /* HAVE_SIMD */ + while (!eos(state)) { + if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { + return true; + } + state->cursor++; + } + return false; +} -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start) { - int cs = EVIL; - - -#line 894 "parser.c" - { - cs = JSON_float_start; - } - -#line 336 "parser.rl" - json->memo = p; - -#line 902 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; - } - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 69: goto st5; - case 101: goto st5; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) >= 45 ) - goto st0; - goto tr9; -tr9: -#line 323 "parser.rl" - { p--; {p++; cs = 9; goto _out;} } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 967 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - switch( (*p) ) { - case 43: goto st6; - case 45: goto st6; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 69: goto st0; - case 101: goto st0; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - } else if ( (*p) >= 45 ) - goto st0; - goto tr9; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 338 "parser.rl" - - if (cs >= JSON_float_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); - return p + 1; - } else { - return NULL; + const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS]; + JSON_UnescapePositions positions = { + .size = 0, + .positions = backslashes, + .additional_backslashes = 0, + }; + + do { + switch (*state->cursor) { + case '"': { + VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions); + state->cursor++; + return json_push_value(state, config, string); + } + case '\\': { + if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) { + backslashes[positions.size] = state->cursor; + positions.size++; + } else { + positions.additional_backslashes++; + } + state->cursor++; + break; + } + default: + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } + break; + } + + state->cursor++; + } while (string_scan(state)); + + raise_parse_error("unexpected end of input, expected closing \"", state); + return Qfalse; +} + +ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) +{ + state->cursor++; + const char *start = state->cursor; + + if (RB_UNLIKELY(!string_scan(state))) { + raise_parse_error("unexpected end of input, expected closing \"", state); } + + if (RB_LIKELY(*state->cursor == '"')) { + VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name); + state->cursor++; + return json_push_value(state, config, string); + } + return json_parse_escaped_string(state, config, is_name, start); +} + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +// Additional References: +// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html +static inline uint64_t decode_8digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; } +static inline uint64_t decode_4digits_unrolled(uint32_t val) { + const uint32_t mask = 0x000000FF; + const uint32_t mul1 = 100; + val -= 0x30303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = ((val & mask) * mul1) + (((val >> 16) & mask)); + return val; +} +#endif +static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) +{ + const char *start = state->cursor; -#line 1044 "parser.c" -static const int JSON_array_start = 1; -static const int JSON_array_first_final = 17; -static const int JSON_array_error = 0; +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) >= sizeof(uint64_t)) { + uint64_t next_8bytes; + memcpy(&next_8bytes, state->cursor, sizeof(uint64_t)); -static const int JSON_array_en_main = 1; + // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333 + // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html + uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4); + if (match == 0x3333333333333333) { // 8 consecutive digits + *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes); + state->cursor += 8; + continue; + } -#line 381 "parser.rl" + uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT; + if (consecutive_digits >= 4) { + *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); + state->cursor += 4; + consecutive_digits -= 4; + } -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) + while (consecutive_digits) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + consecutive_digits--; + state->cursor++; + } + + return (int)(state->cursor - start); + } +#endif + + char next_char; + while (rb_isdigit(next_char = peek(state))) { + *accumulator = *accumulator * 10 + (next_char - '0'); + state->cursor++; + } + return (int)(state->cursor - start); +} + +static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) { - int cs = EVIL; - VALUE array_class = json->array_class; + bool integer = true; + const char first_digit = *state->cursor; - if (json->max_nesting && json->current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); + // Variables for Ryu optimization - extract digits during parsing + int32_t exponent = 0; + int decimal_point_pos = -1; + uint64_t mantissa = 0; + + // Parse integer part and extract mantissa digits + int mantissa_digits = json_parse_digits(state, &mantissa); + + if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) { + raise_parse_error_at("invalid number: %s", state, start); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); - - -#line 1066 "parser.c" - { - cs = JSON_array_start; - } - -#line 394 "parser.rl" - -#line 1073 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 91 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st13; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st2; - goto st0; -tr2: -#line 358 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v); - if (np == NULL) { - p--; {p++; cs = 3; goto _out;} - } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } - {p = (( np))-1;} + + // Parse fractional part + if (peek(state) == '.') { + integer = false; + decimal_point_pos = mantissa_digits; // Remember position of decimal point + state->cursor++; + + int fractional_digits = json_parse_digits(state, &mantissa); + mantissa_digits += fractional_digits; + + if (RB_UNLIKELY(!fractional_digits)) { + raise_parse_error_at("invalid number: %s", state, start); } } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1132 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 44: goto st4; - case 47: goto st9; - case 93: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 13: goto st4; - case 32: goto st4; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st5; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st4; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st8; - } - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) == 42 ) - goto st7; - goto st6; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st4; - } - goto st6; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) == 10 ) - goto st4; - goto st8; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 42: goto st10; - case 47: goto st12; - } - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 42 ) - goto st11; - goto st10; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st11; - case 47: goto st3; - } - goto st10; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 10 ) - goto st3; - goto st12; -tr4: -#line 373 "parser.rl" - { p--; {p++; cs = 17; goto _out;} } - goto st17; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: -#line 1239 "parser.c" - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st16; - } - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 42 ) - goto st15; - goto st14; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - switch( (*p) ) { - case 42: goto st15; - case 47: goto st2; - } - goto st14; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 10 ) - goto st2; - goto st16; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 395 "parser.rl" - - if(cs >= JSON_array_first_final) { - return p + 1; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return NULL; + + // Parse exponent + if (rb_tolower(peek(state)) == 'e') { + integer = false; + state->cursor++; + + bool negative_exponent = false; + const char next_char = peek(state); + if (next_char == '-' || next_char == '+') { + negative_exponent = next_char == '-'; + state->cursor++; + } + + uint64_t abs_exponent = 0; + int exponent_digits = json_parse_digits(state, &abs_exponent); + + if (RB_UNLIKELY(!exponent_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent); } + + if (integer) { + return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor); + } + + // Adjust exponent based on decimal point position + if (decimal_point_pos >= 0) { + exponent -= (mantissa_digits - decimal_point_pos); + } + + return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor); } -static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) +static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config) { - char *p = string, *pe = string, *unescape; - int unescape_len; - char buf[4]; - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) rb_str_buf_cat(result, p, pe - p); - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - return Qnil; - } else { - UTF32 ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { - pe++; - if (pe > stringEnd - 6) return Qnil; - if (pe[0] == '\\' && pe[1] == 'u') { - UTF32 sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } + return json_parse_number(state, config, false, state->cursor); +} + +static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + const char *start = state->cursor; + state->cursor++; + return json_parse_number(state, config, true, start); +} + +static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) +{ + json_eat_whitespace(state); + + switch (peek(state)) { + case 'n': + if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { + state->cursor += 4; + return json_push_value(state, config, Qnil); + } + + raise_parse_error("unexpected token %s", state); + break; + case 't': + if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { + state->cursor += 4; + return json_push_value(state, config, Qtrue); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'f': + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { + state->cursor += 5; + return json_push_value(state, config, Qfalse); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { + state->cursor += 3; + return json_push_value(state, config, CNaN); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'I': + if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { + state->cursor += 8; + return json_push_value(state, config, CInfinity); + } + + raise_parse_error("unexpected token %s", state); + break; + case '-': { + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { + if (config->allow_nan) { + state->cursor += 9; + return json_push_value(state, config, CMinusInfinity); + } else { + raise_parse_error("unexpected token %s", state); + } + } + return json_push_value(state, config, json_parse_negative_number(state, config)); + break; + } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + return json_push_value(state, config, json_parse_positive_number(state, config)); + break; + case '"': { + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + return json_parse_string(state, config, false); + break; + } + case '[': { + state->cursor++; + json_eat_whitespace(state); + long stack_head = state->stack->head; + + if (peek(state) == ']') { + state->cursor++; + return json_push_value(state, config, json_decode_array(state, config, 0)); + } else { + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + state->in_array++; + json_parse_any(state, config); + } + + while (true) { + json_eat_whitespace(state); + + const char next_char = peek(state); + + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == ']') { + continue; } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; } - break; - default: - p = pe; + json_parse_any(state, config); continue; + } + + if (next_char == ']') { + state->cursor++; + long count = state->stack->head - stack_head; + state->current_nesting--; + state->in_array--; + return json_push_value(state, config, json_decode_array(state, config, count)); + } + + raise_parse_error("expected ',' or ']' after array value", state); } - rb_str_buf_cat(result, unescape, unescape_len); - p = ++pe; - } else { - pe++; + break; } - } - rb_str_buf_cat(result, p, pe - p); - return result; -} + case '{': { + const char *object_start_cursor = state->cursor; + state->cursor++; + json_eat_whitespace(state); + long stack_head = state->stack->head; -#line 1376 "parser.c" -static const int JSON_string_start = 1; -static const int JSON_string_first_final = 8; -static const int JSON_string_error = 0; + if (peek(state) == '}') { + state->cursor++; + return json_push_value(state, config, json_decode_object(state, config, 0)); + } else { + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } -static const int JSON_string_en_main = 1; + if (peek(state) != '"') { + raise_parse_error("expected object key, got %s", state); + } + json_parse_string(state, config, true); + json_eat_whitespace(state); + if (peek(state) != ':') { + raise_parse_error("expected ':' after object key", state); + } + state->cursor++; -#line 494 "parser.rl" + json_parse_any(state, config); + } + while (true) { + json_eat_whitespace(state); -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} + const char next_char = peek(state); + if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = state->stack->head - stack_head; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - *result = rb_str_buf_new(0); - -#line 1406 "parser.c" - { - cs = JSON_string_start; - } - -#line 515 "parser.rl" - json->memo = p; - -#line 1414 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 34 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 34: goto tr2; - case 92: goto st3; - } - if ( 0 <= (*p) && (*p) <= 31 ) - goto st0; - goto st2; -tr2: -#line 480 "parser.rl" - { - *result = json_string_unescape(*result, json->memo + 1, p); - if (NIL_P(*result)) { - p--; - {p++; cs = 8; goto _out;} - } else { - FORCE_UTF8(*result); - {p = (( p + 1))-1;} + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = object_start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; + + return json_push_value(state, config, object); + } + + if (next_char == ',') { + state->cursor++; + json_eat_whitespace(state); + + if (config->allow_trailing_comma) { + if (peek(state) == '}') { + continue; + } + } + + if (RB_UNLIKELY(peek(state) != '"')) { + raise_parse_error("expected object key, got: %s", state); + } + json_parse_string(state, config, true); + + json_eat_whitespace(state); + if (RB_UNLIKELY(peek(state) != ':')) { + raise_parse_error("expected ':' after object key, got: %s", state); + } + state->cursor++; + + json_parse_any(state, config); + + continue; + } + + raise_parse_error("expected ',' or '}' after object value, got: %s", state); + } + break; } - } -#line 491 "parser.rl" - { p--; {p++; cs = 8; goto _out;} } - goto st8; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 1457 "parser.c" - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 117 ) - goto st4; - if ( 0 <= (*p) && (*p) <= 31 ) - goto st0; - goto st2; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st5; - } else - goto st5; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st6; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st6; - } else - goto st6; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st7; - } else - goto st7; - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st2; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st2; - } else - goto st2; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 517 "parser.rl" - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - if (json->symbolize_names && json->parsing_name) { - *result = rb_str_intern(*result); + case 0: + raise_parse_error("unexpected end of input", state); + break; + + default: + raise_parse_error("unexpected character: %s", state); + break; } - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; + + raise_parse_error("unreachable: %s", state); + return Qundef; +} + +static void json_ensure_eof(JSON_ParserState *state) +{ + json_eat_whitespace(state); + if (!eos(state)) { + raise_parse_error("unexpected token at end of stream %s", state); } } @@ -1566,51 +1411,84 @@ case 7: static VALUE convert_encoding(VALUE source) { - char *ptr = RSTRING_PTR(source); - long len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } -#ifdef HAVE_RUBY_ENCODING_H - { - VALUE encoding = rb_funcall(source, i_encoding, 0); - if (encoding == CEncoding_ASCII_8BIT) { - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE); - } else { - source = rb_str_dup(source); - FORCE_UTF8(source); + int encindex = RB_ENCODING_GET(source); + + if (RB_LIKELY(encindex == utf8_encindex)) { + return source; + } + + if (encindex == binary_encindex) { + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + } + + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); +} + +static int parser_config_init_i(VALUE key, VALUE val, VALUE data) +{ + JSON_ParserConfig *config = (JSON_ParserConfig *)data; + + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { + if (RTEST(val)) { + if (rb_respond_to(val, i_try_convert)) { + config->decimal_class = val; + config->decimal_method_id = i_try_convert; + } else if (rb_respond_to(val, i_new)) { + config->decimal_class = val; + config->decimal_method_id = i_new; + } else if (RB_TYPE_P(val, T_CLASS)) { + VALUE name = rb_class_name(val); + const char *name_cstr = RSTRING_PTR(name); + const char *last_colon = strrchr(name_cstr, ':'); + if (last_colon) { + const char *mod_path_end = last_colon - 1; + VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); + config->decimal_class = rb_path_to_class(mod_path); + + const char *method_name_beg = last_colon + 1; + long before_len = method_name_beg - name_cstr; + long len = RSTRING_LEN(name) - before_len; + VALUE method_name = rb_str_substr(name, before_len, len); + config->decimal_method_id = SYM2ID(rb_str_intern(method_name)); + } else { + config->decimal_class = rb_mKernel; + config->decimal_method_id = SYM2ID(rb_str_intern(name)); + } } - } else { - source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); } } -#else - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); + + return ST_CONTINUE; +} + +static void parser_config_init(JSON_ParserConfig *config, VALUE opts) +{ + config->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, parser_config_init_i, (VALUE)config); + } + } -#endif - return source; } /* - * call-seq: new(source, opts => {}) - * - * Creates a new JSON::Ext::Parser instance for the string _source_. + * call-seq: new(opts => {}) * - * Creates a new JSON::Ext::Parser instance for the string _source_. + * Creates a new JSON::Ext::ParserConfig instance. * * It will be configured by the _opts_ hash. _opts_ can have the following * keys: @@ -1623,582 +1501,172 @@ static VALUE convert_encoding(VALUE source) * defiance of RFC 4627 to be parsed by the Parser. This option defaults to * false. * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is also - * the default. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matchin class and create_id was found. This option - * defaults to true. - * * *object_class*: Defaults to Hash - * * *array_class*: Defaults to Array + * (keys) in a JSON object. Otherwise strings are returned, which is + * also the default. It's not possible to use this option in + * conjunction with the *create_additions* option. + * * *decimal_class*: Specifies which class to use instead of the default + * (Float) when parsing decimal numbers. This class must accept a single + * string argument in its constructor. */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +static VALUE cParserConfig_initialize(VALUE self, VALUE opts) { - VALUE source, opts; - GET_PARSER_INIT; - - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_scan_args(argc, argv, "11", &source, &opts); - if (!NIL_P(opts)) { - opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(opts)) { - rb_raise(rb_eArgError, "opts needs to be like a hash"); - } else { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_quirks_mode); - if (option_given_p(opts, tmp)) { - VALUE quirks_mode = rb_hash_aref(opts, tmp); - json->quirks_mode = RTEST(quirks_mode) ? 1 : 0; - } else { - json->quirks_mode = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 1; - json->create_id = rb_funcall(mJSON, i_create_id, 0); - json->object_class = Qnil; - json->array_class = Qnil; - } - source = rb_convert_type(source, T_STRING, "String", "to_str"); - if (!json->quirks_mode) { - source = convert_encoding(StringValue(source)); - } - json->current_nesting = 0; - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; - json->Vsource = source; - return self; -} - + rb_check_frozen(self); + GET_PARSER_CONFIG; -#line 1733 "parser.c" -static const int JSON_start = 1; -static const int JSON_first_final = 10; -static const int JSON_error = 0; + parser_config_init(config, opts); -static const int JSON_en_main = 1; + RB_OBJ_WRITTEN(self, Qundef, config->decimal_class); - -#line 740 "parser.rl" - - -static VALUE cParser_parse_strict(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - -#line 1752 "parser.c" - { - cs = JSON_start; - } - -#line 750 "parser.rl" - p = json->source; - pe = p + json->len; - -#line 1761 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 47: goto st2; - case 91: goto tr3; - case 123: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st1; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st1; - goto st5; -tr3: -#line 729 "parser.rl" - { - char *np; - json->current_nesting = 1; - np = JSON_parse_array(json, p, pe, &result); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -tr4: -#line 722 "parser.rl" - { - char *np; - json->current_nesting = 1; - np = JSON_parse_object(json, p, pe, &result); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 1838 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st6; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st10; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st10; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 753 "parser.rl" - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } + return self; } +static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource) +{ + Vsource = convert_encoding(StringValue(Vsource)); + StringValue(Vsource); + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; -#line 1907 "parser.c" -static const int JSON_quirks_mode_start = 1; -static const int JSON_quirks_mode_first_final = 10; -static const int JSON_quirks_mode_error = 0; + long len; + const char *start; + RSTRING_GETMEM(Vsource, start, len); -static const int JSON_quirks_mode_en_main = 1; + JSON_ParserState _state = { + .start = start, + .cursor = start, + .end = start + len, + .stack = &stack, + }; + JSON_ParserState *state = &_state; + VALUE result = json_parse_any(state, config); -#line 778 "parser.rl" + // This may be skipped in case of exception, but + // it won't cause a leak. + rvalue_stack_eagerly_release(state->stack_handle); + json_ensure_eof(state); -static VALUE cParser_parse_quirks_mode(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - -#line 1926 "parser.c" - { - cs = JSON_quirks_mode_start; - } - -#line 788 "parser.rl" - p = json->source; - pe = p + json->len; - -#line 1935 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 770 "parser.rl" - { - char *np = JSON_parse_value(json, p, pe, &result); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 1979 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 791 "parser.rl" - - if (cs >= JSON_quirks_mode_first_final && p == pe) { - return result; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } + return result; } /* - * call-seq: parse() + * call-seq: parse(source) * * Parses the current JSON text _source_ and returns the complete data * structure as a result. + * It raises JSON::ParserError if fail to parse. */ -static VALUE cParser_parse(VALUE self) +static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) { - GET_PARSER; - - if (json->quirks_mode) { - return cParser_parse_quirks_mode(self); - } else { - return cParser_parse_strict(self); - } + GET_PARSER_CONFIG; + return cParser_parse(config, Vsource); } - -static JSON_Parser *JSON_allocate() +static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) { - JSON_Parser *json = ALLOC(JSON_Parser); - MEMZERO(json, JSON_Parser, 1); - json->fbuffer = fbuffer_alloc(0); - return json; -} + Vsource = convert_encoding(StringValue(Vsource)); + StringValue(Vsource); -static void JSON_mark(JSON_Parser *json) -{ - rb_gc_mark_maybe(json->Vsource); - rb_gc_mark_maybe(json->create_id); - rb_gc_mark_maybe(json->object_class); - rb_gc_mark_maybe(json->array_class); - rb_gc_mark_maybe(json->match_string); + JSON_ParserConfig _config = {0}; + JSON_ParserConfig *config = &_config; + parser_config_init(config, opts); + + return cParser_parse(config, Vsource); } -static void JSON_free(JSON_Parser *json) +static void JSON_ParserConfig_mark(void *ptr) { - fbuffer_free(json->fbuffer); - ruby_xfree(json); + JSON_ParserConfig *config = ptr; + rb_gc_mark(config->on_load_proc); + rb_gc_mark(config->decimal_class); } -static VALUE cJSON_parser_s_allocate(VALUE klass) +static void JSON_ParserConfig_free(void *ptr) { - JSON_Parser *json = JSON_allocate(); - return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json); + JSON_ParserConfig *config = ptr; + ruby_xfree(config); } -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) +static size_t JSON_ParserConfig_memsize(const void *ptr) { - GET_PARSER; - return rb_str_dup(json->Vsource); + return sizeof(JSON_ParserConfig); } -/* - * call-seq: quirks_mode?() - * - * Returns a true, if this parser is in quirks_mode, false otherwise. - */ -static VALUE cParser_quirks_mode_p(VALUE self) +static const rb_data_type_t JSON_ParserConfig_type = { + "JSON::Ext::Parser/ParserConfig", + { + JSON_ParserConfig_mark, + JSON_ParserConfig_free, + JSON_ParserConfig_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE, +}; + +static VALUE cJSON_parser_s_allocate(VALUE klass) { - GET_PARSER; - return json->quirks_mode ? Qtrue : Qfalse; + JSON_ParserConfig *config; + return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config); } - -void Init_parser() +void Init_parser(void) { +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + +#undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - eParserError = rb_path2class("JSON::ParserError"); + VALUE mExt = rb_define_module_under(mJSON, "Ext"); + VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject); eNestingError = rb_path2class("JSON::NestingError"); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); - rb_define_method(cParser, "quirks_mode?", cParser_quirks_mode_p, 0); + rb_gc_register_mark_object(eNestingError); + rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate); + rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1); + rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1); + + VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); CNaN = rb_const_get(mJSON, rb_intern("NaN")); + rb_gc_register_mark_object(CNaN); + CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); + rb_gc_register_mark_object(CInfinity); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); - i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_quirks_mode = rb_intern("quirks_mode"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); - i_key_p = rb_intern("key?"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); -#ifdef HAVE_RUBY_ENCODING_H - CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); - CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be")); - CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le")); - CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be")); - CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le")); - CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); - i_encoding = rb_intern("encoding"); + CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); + rb_gc_register_mark_object(CMinusInfinity); + + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_on_load = ID2SYM(rb_intern("on_load")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + + i_new = rb_intern("new"); + i_try_convert = rb_intern("try_convert"); + i_uminus = rb_intern("-@"); i_encode = rb_intern("encode"); -#else - i_iconv = rb_intern("iconv"); + + binary_encindex = rb_ascii8bit_encindex(); + utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); + +#ifdef HAVE_SIMD + simd_impl = find_simd_implementation(); #endif } - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h deleted file mode 100644 index b192064c09..0000000000 --- a/ext/json/parser/parser.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#include "ruby.h" - -#ifndef HAVE_RUBY_RE_H -#include "re.h" -#endif - -#ifdef HAVE_RUBY_ST_H -#include "ruby/st.h" -#else -#include "st.h" -#endif - -#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) - -/* unicode */ - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - int max_nesting; - int current_nesting; - int allow_nan; - int parsing_name; - int symbolize_names; - int quirks_mode; - VALUE object_class; - VALUE array_class; - int create_additions; - VALUE match_string; - FBuffer *fbuffer; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - Data_Get_Struct(self, JSON_Parser, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static UTF32 unescape_unicode(const unsigned char *p); -static int convert_UTF32_to_UTF8(char *buf, UTF32 ch); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result); -static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static VALUE convert_encoding(VALUE source); -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cParser_parse(VALUE self); -static JSON_Parser *JSON_allocate(); -static void JSON_mark(JSON_Parser *json); -static void JSON_free(JSON_Parser *json); -static VALUE cJSON_parser_s_allocate(VALUE klass); -static VALUE cParser_source(VALUE self); - -#endif diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl deleted file mode 100644 index ab8d318173..0000000000 --- a/ext/json/parser/parser.rl +++ /dev/null @@ -1,927 +0,0 @@ -#include "../fbuffer/fbuffer.h" -#include "parser.h" - -/* unicode */ - -static const char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static UTF32 unescape_unicode(const unsigned char *p) -{ - char b; - UTF32 result = 0; - b = digit_values[p[0]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | b; - b = digit_values[p[1]]; - result = (result << 4) | b; - if (b < 0) return UNI_REPLACEMENT_CHAR; - b = digit_values[p[2]]; - result = (result << 4) | b; - if (b < 0) return UNI_REPLACEMENT_CHAR; - b = digit_values[p[3]]; - result = (result << 4) | b; - if (b < 0) return UNI_REPLACEMENT_CHAR; - return result; -} - -static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} - -#ifdef HAVE_RUBY_ENCODING_H -static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE, - CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE; -static ID i_encoding, i_encode; -#else -static ID i_iconv; -#endif - -static VALUE mJSON, mExt, cParser, eParserError, eNestingError; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_quirks_mode, - i_object_class, i_array_class, i_key_p, i_deep_const_get, i_match, - i_match_string, i_aset, i_aref, i_leftshift; - -%%{ - machine JSON_common; - - cr = '\n'; - cr_neg = [^\n]; - ws = [ \t\r\n]; - c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; - cpp_comment = '//' cr_neg* cr; - comment = c_comment | cpp_comment; - ignore = ws | comment; - name_separator = ':'; - value_separator = ','; - Vnull = 'null'; - Vfalse = 'false'; - Vtrue = 'true'; - VNaN = 'NaN'; - VInfinity = 'Infinity'; - VMinusInfinity = '-Infinity'; - begin_value = [nft\"\-\[\{NI] | digit; - begin_object = '{'; - end_object = '}'; - begin_array = '['; - end_array = ']'; - begin_string = '"'; - begin_name = begin_string; - begin_number = digit | '-'; -}%% - -%%{ - machine JSON_object; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v); - if (np == NULL) { - fhold; fbreak; - } else { - if (NIL_P(json->object_class)) { - rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); - } - fexec np; - } - } - - action parse_name { - char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, fpc, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - - pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value; - next_pair = ignore* value_separator pair; - - main := ( - begin_object - (pair (next_pair)*)? ignore* - end_object - ) @exit; -}%% - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; - - if (json->max_nesting && json->current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); - } - - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); - - %% write init; - %% write exec; - - if (cs >= JSON_object_first_final) { - if (json->create_additions) { - VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); - } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - } - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_value; - include JSON_common; - - write data; - - action parse_null { - *result = Qnil; - } - action parse_false { - *result = Qfalse; - } - action parse_true { - *result = Qtrue; - } - action parse_nan { - if (json->allow_nan) { - *result = CNaN; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); - } - } - action parse_infinity { - if (json->allow_nan) { - *result = CInfinity; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); - } - } - action parse_string { - char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_number { - char *np; - if(pe > fpc + 9 - json->quirks_mode && !strncmp(MinusInfinity, fpc, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - fexec p + 10; - fhold; fbreak; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - } - } - np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) fexec np; - np = JSON_parse_integer(json, fpc, pe, result); - if (np != NULL) fexec np; - fhold; fbreak; - } - - action parse_array { - char *np; - json->current_nesting++; - np = JSON_parse_array(json, fpc, pe, result); - json->current_nesting--; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_object { - char *np; - json->current_nesting++; - np = JSON_parse_object(json, fpc, pe, result); - json->current_nesting--; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - -main := ( - Vnull @parse_null | - Vfalse @parse_false | - Vtrue @parse_true | - VNaN @parse_nan | - VInfinity @parse_infinity | - begin_number >parse_number | - begin_string >parse_string | - begin_array >parse_array | - begin_object >parse_object - ) %*exit; -}%% - -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - - %% write init; - %% write exec; - - if (cs >= JSON_value_first_final) { - return p; - } else { - return NULL; - } -} - -%%{ - machine JSON_integer; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); -}%% - -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_integer_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); - return p + 1; - } else { - return NULL; - } -} - -%%{ - machine JSON_float; - include JSON_common; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ( - (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) - | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) - ) (^[0-9Ee.\-]? @exit ); -}%% - -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_float_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_array; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v); - if (np == NULL) { - fhold; fbreak; - } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } - fexec np; - } - } - - action exit { fhold; fbreak; } - - next_element = value_separator ignore* begin_value >parse_value; - - main := begin_array ignore* - ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*)? - end_array @exit; -}%% - -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE array_class = json->array_class; - - if (json->max_nesting && json->current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting); - } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); - - %% write init; - %% write exec; - - if(cs >= JSON_array_first_final) { - return p + 1; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return NULL; - } -} - -static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) -{ - char *p = string, *pe = string, *unescape; - int unescape_len; - char buf[4]; - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) rb_str_buf_cat(result, p, pe - p); - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - return Qnil; - } else { - UTF32 ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { - pe++; - if (pe > stringEnd - 6) return Qnil; - if (pe[0] == '\\' && pe[1] == 'u') { - UTF32 sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } - } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; - } - break; - default: - p = pe; - continue; - } - rb_str_buf_cat(result, unescape, unescape_len); - p = ++pe; - } else { - pe++; - } - } - rb_str_buf_cat(result, p, pe - p); - return result; -} - -%%{ - machine JSON_string; - include JSON_common; - - write data; - - action parse_string { - *result = json_string_unescape(*result, json->memo + 1, p); - if (NIL_P(*result)) { - fhold; - fbreak; - } else { - FORCE_UTF8(*result); - fexec p + 1; - } - } - - action exit { fhold; fbreak; } - - main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; -}%% - -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} - -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - *result = rb_str_buf_new(0); - %% write init; - json->memo = p; - %% write exec; - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (json->symbolize_names && json->parsing_name) { - *result = rb_str_intern(*result); - } - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; - } -} - -/* - * Document-class: JSON::Ext::Parser - * - * This is the JSON parser implemented as a C extension. It can be configured - * to be used by setting - * - * JSON.parser = JSON::Ext::Parser - * - * with the method parser= in JSON. - * - */ - -static VALUE convert_encoding(VALUE source) -{ - char *ptr = RSTRING_PTR(source); - long len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } -#ifdef HAVE_RUBY_ENCODING_H - { - VALUE encoding = rb_funcall(source, i_encoding, 0); - if (encoding == CEncoding_ASCII_8BIT) { - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE); - } else { - source = rb_str_dup(source); - FORCE_UTF8(source); - } - } else { - source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8); - } - } -#else - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); - } -#endif - return source; -} - -/* - * call-seq: new(source, opts => {}) - * - * Creates a new JSON::Ext::Parser instance for the string _source_. - * - * Creates a new JSON::Ext::Parser instance for the string _source_. - * - * It will be configured by the _opts_ hash. _opts_ can have the following - * keys: - * - * _opts_ can have the following keys: - * * *max_nesting*: The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, it - * defaults to 100. - * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - * defiance of RFC 4627 to be parsed by the Parser. This option defaults to - * false. - * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is also - * the default. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matchin class and create_id was found. This option - * defaults to true. - * * *object_class*: Defaults to Hash - * * *array_class*: Defaults to Array - */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) -{ - VALUE source, opts; - GET_PARSER_INIT; - - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_scan_args(argc, argv, "11", &source, &opts); - if (!NIL_P(opts)) { - opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(opts)) { - rb_raise(rb_eArgError, "opts needs to be like a hash"); - } else { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_quirks_mode); - if (option_given_p(opts, tmp)) { - VALUE quirks_mode = rb_hash_aref(opts, tmp); - json->quirks_mode = RTEST(quirks_mode) ? 1 : 0; - } else { - json->quirks_mode = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 1; - json->create_id = rb_funcall(mJSON, i_create_id, 0); - json->object_class = Qnil; - json->array_class = Qnil; - } - source = rb_convert_type(source, T_STRING, "String", "to_str"); - if (!json->quirks_mode) { - source = convert_encoding(StringValue(source)); - } - json->current_nesting = 0; - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; - json->Vsource = source; - return self; -} - -%%{ - machine JSON; - - write data; - - include JSON_common; - - action parse_object { - char *np; - json->current_nesting = 1; - np = JSON_parse_object(json, fpc, pe, &result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_array { - char *np; - json->current_nesting = 1; - np = JSON_parse_array(json, fpc, pe, &result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_object >parse_object | - begin_array >parse_array - ) ignore*; -}%% - -static VALUE cParser_parse_strict(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } -} - - -%%{ - machine JSON_quirks_mode; - - write data; - - include JSON_common; - - action parse_value { - char *np = JSON_parse_value(json, fpc, pe, &result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_value >parse_value - ) ignore*; -}%% - -static VALUE cParser_parse_quirks_mode(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (cs >= JSON_quirks_mode_first_final && p == pe) { - return result; - } else { - rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } -} - -/* - * call-seq: parse() - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - */ -static VALUE cParser_parse(VALUE self) -{ - GET_PARSER; - - if (json->quirks_mode) { - return cParser_parse_quirks_mode(self); - } else { - return cParser_parse_strict(self); - } -} - - -static JSON_Parser *JSON_allocate() -{ - JSON_Parser *json = ALLOC(JSON_Parser); - MEMZERO(json, JSON_Parser, 1); - json->fbuffer = fbuffer_alloc(0); - return json; -} - -static void JSON_mark(JSON_Parser *json) -{ - rb_gc_mark_maybe(json->Vsource); - rb_gc_mark_maybe(json->create_id); - rb_gc_mark_maybe(json->object_class); - rb_gc_mark_maybe(json->array_class); - rb_gc_mark_maybe(json->match_string); -} - -static void JSON_free(JSON_Parser *json) -{ - fbuffer_free(json->fbuffer); - ruby_xfree(json); -} - -static VALUE cJSON_parser_s_allocate(VALUE klass) -{ - JSON_Parser *json = JSON_allocate(); - return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json); -} - -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) -{ - GET_PARSER; - return rb_str_dup(json->Vsource); -} - -/* - * call-seq: quirks_mode?() - * - * Returns a true, if this parser is in quirks_mode, false otherwise. - */ -static VALUE cParser_quirks_mode_p(VALUE self) -{ - GET_PARSER; - return json->quirks_mode ? Qtrue : Qfalse; -} - - -void Init_parser() -{ - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - eParserError = rb_path2class("JSON::ParserError"); - eNestingError = rb_path2class("JSON::NestingError"); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); - rb_define_method(cParser, "quirks_mode?", cParser_quirks_mode_p, 0); - - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); - i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_quirks_mode = rb_intern("quirks_mode"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); - i_key_p = rb_intern("key?"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); -#ifdef HAVE_RUBY_ENCODING_H - CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); - CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be")); - CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le")); - CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be")); - CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le")); - CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit")); - i_encoding = rb_intern("encoding"); - i_encode = rb_intern("encode"); -#else - i_iconv = rb_intern("iconv"); -#endif -} - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk deleted file mode 100644 index 440ef4017e..0000000000 --- a/ext/json/parser/prereq.mk +++ /dev/null @@ -1,9 +0,0 @@ -RAGEL = ragel - -.SUFFIXES: .rl - -.rl.c: - $(RAGEL) -G2 $< - $(BASERUBY) -pli -e '$$_.sub!(/[ \t]+$$/, "")' $@ - -parser.c: |
