summaryrefslogtreecommitdiff
path: root/ext/json/parser
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/parser')
-rw-r--r--ext/json/parser/depend165
-rw-r--r--ext/json/parser/extconf.rb14
-rw-r--r--ext/json/parser/parser.c3336
-rw-r--r--ext/json/parser/parser.h91
-rw-r--r--ext/json/parser/parser.rl891
-rw-r--r--ext/json/parser/prereq.mk10
6 files changed, 1609 insertions, 2898 deletions
diff --git a/ext/json/parser/depend b/ext/json/parser/depend
index d0c9c2d2a6..d4737b1dfb 100644
--- a/ext/json/parser/depend
+++ b/ext/json/parser/depend
@@ -1,5 +1,5 @@
$(OBJS): $(ruby_headers)
-parser.o: parser.c parser.h $(srcdir)/../fbuffer/fbuffer.h
+parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h
# AUTOGENERATED DEPENDENCIES START
parser.o: $(RUBY_EXTCONF_H)
@@ -7,9 +7,167 @@ parser.o: $(arch_hdrdir)/ruby/config.h
parser.o: $(hdrdir)/ruby.h
parser.o: $(hdrdir)/ruby/assert.h
parser.o: $(hdrdir)/ruby/backward.h
+parser.o: $(hdrdir)/ruby/backward/2/assume.h
+parser.o: $(hdrdir)/ruby/backward/2/attributes.h
+parser.o: $(hdrdir)/ruby/backward/2/bool.h
+parser.o: $(hdrdir)/ruby/backward/2/inttypes.h
+parser.o: $(hdrdir)/ruby/backward/2/limits.h
+parser.o: $(hdrdir)/ruby/backward/2/long_long.h
+parser.o: $(hdrdir)/ruby/backward/2/stdalign.h
+parser.o: $(hdrdir)/ruby/backward/2/stdarg.h
parser.o: $(hdrdir)/ruby/defines.h
parser.o: $(hdrdir)/ruby/encoding.h
parser.o: $(hdrdir)/ruby/intern.h
+parser.o: $(hdrdir)/ruby/internal/abi.h
+parser.o: $(hdrdir)/ruby/internal/anyargs.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/char.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/double.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/int.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/long.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/short.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h
+parser.o: $(hdrdir)/ruby/internal/assume.h
+parser.o: $(hdrdir)/ruby/internal/attr/alloc_size.h
+parser.o: $(hdrdir)/ruby/internal/attr/artificial.h
+parser.o: $(hdrdir)/ruby/internal/attr/cold.h
+parser.o: $(hdrdir)/ruby/internal/attr/const.h
+parser.o: $(hdrdir)/ruby/internal/attr/constexpr.h
+parser.o: $(hdrdir)/ruby/internal/attr/deprecated.h
+parser.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h
+parser.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h
+parser.o: $(hdrdir)/ruby/internal/attr/error.h
+parser.o: $(hdrdir)/ruby/internal/attr/flag_enum.h
+parser.o: $(hdrdir)/ruby/internal/attr/forceinline.h
+parser.o: $(hdrdir)/ruby/internal/attr/format.h
+parser.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h
+parser.o: $(hdrdir)/ruby/internal/attr/noalias.h
+parser.o: $(hdrdir)/ruby/internal/attr/nodiscard.h
+parser.o: $(hdrdir)/ruby/internal/attr/noexcept.h
+parser.o: $(hdrdir)/ruby/internal/attr/noinline.h
+parser.o: $(hdrdir)/ruby/internal/attr/nonnull.h
+parser.o: $(hdrdir)/ruby/internal/attr/noreturn.h
+parser.o: $(hdrdir)/ruby/internal/attr/packed_struct.h
+parser.o: $(hdrdir)/ruby/internal/attr/pure.h
+parser.o: $(hdrdir)/ruby/internal/attr/restrict.h
+parser.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h
+parser.o: $(hdrdir)/ruby/internal/attr/warning.h
+parser.o: $(hdrdir)/ruby/internal/attr/weakref.h
+parser.o: $(hdrdir)/ruby/internal/cast.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/apple.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/clang.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/intel.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h
+parser.o: $(hdrdir)/ruby/internal/compiler_since.h
+parser.o: $(hdrdir)/ruby/internal/config.h
+parser.o: $(hdrdir)/ruby/internal/constant_p.h
+parser.o: $(hdrdir)/ruby/internal/core.h
+parser.o: $(hdrdir)/ruby/internal/core/rarray.h
+parser.o: $(hdrdir)/ruby/internal/core/rbasic.h
+parser.o: $(hdrdir)/ruby/internal/core/rbignum.h
+parser.o: $(hdrdir)/ruby/internal/core/rclass.h
+parser.o: $(hdrdir)/ruby/internal/core/rdata.h
+parser.o: $(hdrdir)/ruby/internal/core/rfile.h
+parser.o: $(hdrdir)/ruby/internal/core/rhash.h
+parser.o: $(hdrdir)/ruby/internal/core/robject.h
+parser.o: $(hdrdir)/ruby/internal/core/rregexp.h
+parser.o: $(hdrdir)/ruby/internal/core/rstring.h
+parser.o: $(hdrdir)/ruby/internal/core/rstruct.h
+parser.o: $(hdrdir)/ruby/internal/core/rtypeddata.h
+parser.o: $(hdrdir)/ruby/internal/ctype.h
+parser.o: $(hdrdir)/ruby/internal/dllexport.h
+parser.o: $(hdrdir)/ruby/internal/dosish.h
+parser.o: $(hdrdir)/ruby/internal/encoding/coderange.h
+parser.o: $(hdrdir)/ruby/internal/encoding/ctype.h
+parser.o: $(hdrdir)/ruby/internal/encoding/encoding.h
+parser.o: $(hdrdir)/ruby/internal/encoding/pathname.h
+parser.o: $(hdrdir)/ruby/internal/encoding/re.h
+parser.o: $(hdrdir)/ruby/internal/encoding/sprintf.h
+parser.o: $(hdrdir)/ruby/internal/encoding/string.h
+parser.o: $(hdrdir)/ruby/internal/encoding/symbol.h
+parser.o: $(hdrdir)/ruby/internal/encoding/transcode.h
+parser.o: $(hdrdir)/ruby/internal/error.h
+parser.o: $(hdrdir)/ruby/internal/eval.h
+parser.o: $(hdrdir)/ruby/internal/event.h
+parser.o: $(hdrdir)/ruby/internal/fl_type.h
+parser.o: $(hdrdir)/ruby/internal/gc.h
+parser.o: $(hdrdir)/ruby/internal/glob.h
+parser.o: $(hdrdir)/ruby/internal/globals.h
+parser.o: $(hdrdir)/ruby/internal/has/attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/builtin.h
+parser.o: $(hdrdir)/ruby/internal/has/c_attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/extension.h
+parser.o: $(hdrdir)/ruby/internal/has/feature.h
+parser.o: $(hdrdir)/ruby/internal/has/warning.h
+parser.o: $(hdrdir)/ruby/internal/intern/array.h
+parser.o: $(hdrdir)/ruby/internal/intern/bignum.h
+parser.o: $(hdrdir)/ruby/internal/intern/class.h
+parser.o: $(hdrdir)/ruby/internal/intern/compar.h
+parser.o: $(hdrdir)/ruby/internal/intern/complex.h
+parser.o: $(hdrdir)/ruby/internal/intern/cont.h
+parser.o: $(hdrdir)/ruby/internal/intern/dir.h
+parser.o: $(hdrdir)/ruby/internal/intern/enum.h
+parser.o: $(hdrdir)/ruby/internal/intern/enumerator.h
+parser.o: $(hdrdir)/ruby/internal/intern/error.h
+parser.o: $(hdrdir)/ruby/internal/intern/eval.h
+parser.o: $(hdrdir)/ruby/internal/intern/file.h
+parser.o: $(hdrdir)/ruby/internal/intern/hash.h
+parser.o: $(hdrdir)/ruby/internal/intern/io.h
+parser.o: $(hdrdir)/ruby/internal/intern/load.h
+parser.o: $(hdrdir)/ruby/internal/intern/marshal.h
+parser.o: $(hdrdir)/ruby/internal/intern/numeric.h
+parser.o: $(hdrdir)/ruby/internal/intern/object.h
+parser.o: $(hdrdir)/ruby/internal/intern/parse.h
+parser.o: $(hdrdir)/ruby/internal/intern/proc.h
+parser.o: $(hdrdir)/ruby/internal/intern/process.h
+parser.o: $(hdrdir)/ruby/internal/intern/random.h
+parser.o: $(hdrdir)/ruby/internal/intern/range.h
+parser.o: $(hdrdir)/ruby/internal/intern/rational.h
+parser.o: $(hdrdir)/ruby/internal/intern/re.h
+parser.o: $(hdrdir)/ruby/internal/intern/ruby.h
+parser.o: $(hdrdir)/ruby/internal/intern/select.h
+parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h
+parser.o: $(hdrdir)/ruby/internal/intern/set.h
+parser.o: $(hdrdir)/ruby/internal/intern/signal.h
+parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h
+parser.o: $(hdrdir)/ruby/internal/intern/string.h
+parser.o: $(hdrdir)/ruby/internal/intern/struct.h
+parser.o: $(hdrdir)/ruby/internal/intern/thread.h
+parser.o: $(hdrdir)/ruby/internal/intern/time.h
+parser.o: $(hdrdir)/ruby/internal/intern/variable.h
+parser.o: $(hdrdir)/ruby/internal/intern/vm.h
+parser.o: $(hdrdir)/ruby/internal/interpreter.h
+parser.o: $(hdrdir)/ruby/internal/iterator.h
+parser.o: $(hdrdir)/ruby/internal/memory.h
+parser.o: $(hdrdir)/ruby/internal/method.h
+parser.o: $(hdrdir)/ruby/internal/module.h
+parser.o: $(hdrdir)/ruby/internal/newobj.h
+parser.o: $(hdrdir)/ruby/internal/scan_args.h
+parser.o: $(hdrdir)/ruby/internal/special_consts.h
+parser.o: $(hdrdir)/ruby/internal/static_assert.h
+parser.o: $(hdrdir)/ruby/internal/stdalign.h
+parser.o: $(hdrdir)/ruby/internal/stdbool.h
+parser.o: $(hdrdir)/ruby/internal/stdckdint.h
+parser.o: $(hdrdir)/ruby/internal/symbol.h
+parser.o: $(hdrdir)/ruby/internal/value.h
+parser.o: $(hdrdir)/ruby/internal/value_type.h
+parser.o: $(hdrdir)/ruby/internal/variable.h
+parser.o: $(hdrdir)/ruby/internal/warning_push.h
+parser.o: $(hdrdir)/ruby/internal/xmalloc.h
parser.o: $(hdrdir)/ruby/missing.h
parser.o: $(hdrdir)/ruby/onigmo.h
parser.o: $(hdrdir)/ruby/oniguruma.h
@@ -17,7 +175,8 @@ parser.o: $(hdrdir)/ruby/ruby.h
parser.o: $(hdrdir)/ruby/st.h
parser.o: $(hdrdir)/ruby/subst.h
parser.o: $(srcdir)/../fbuffer/fbuffer.h
+parser.o: $(srcdir)/../json.h
+parser.o: $(srcdir)/../simd/simd.h
+parser.o: $(srcdir)/../vendor/ryu.h
parser.o: parser.c
-parser.o: parser.h
-parser.o: parser.rl
# AUTOGENERATED DEPENDENCIES END
diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb
index f7360d46b2..2440e66d8b 100644
--- a/ext/json/parser/extconf.rb
+++ b/ext/json/parser/extconf.rb
@@ -1,6 +1,16 @@
-# frozen_string_literal: false
+# frozen_string_literal: true
require 'mkmf'
-have_func("rb_enc_raise", "ruby.h")
+$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0"
+have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
+have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
+have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
+have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
+
+append_cflags("-std=c99")
+
+if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
+ load __dir__ + "/../simd/conf.rb"
+end
create_makefile 'json/ext/parser'
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 6f0d31c2eb..f1ea1b6abb 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1,69 +1,301 @@
+#include "../json.h"
+#include "../vendor/ryu.h"
+#include "../simd/simd.h"
-#line 1 "parser.rl"
-#include "../fbuffer/fbuffer.h"
-#include "parser.h"
+static VALUE mJSON, eNestingError, Encoding_UTF_8;
+static VALUE CNaN, CInfinity, CMinusInfinity;
+
+static ID i_new, i_try_convert, i_uminus, i_encode;
+
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
+ sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
+
+static int binary_encindex;
+static int utf8_encindex;
-#if defined HAVE_RUBY_ENCODING_H
-# define EXC_ENCODING rb_utf8_encoding(),
-# ifndef HAVE_RB_ENC_RAISE
+#ifndef HAVE_RB_HASH_BULK_INSERT
+// For TruffleRuby
static void
-enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
+rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
{
- va_list args;
- VALUE mesg;
+ long index = 0;
+ while (index < count) {
+ VALUE name = pairs[index++];
+ VALUE value = pairs[index++];
+ rb_hash_aset(hash, name, value);
+ }
+ RB_GC_GUARD(hash);
+}
+#endif
- va_start(args, fmt);
- mesg = rb_enc_vsprintf(enc, fmt, args);
- va_end(args);
+#ifndef HAVE_RB_HASH_NEW_CAPA
+#define rb_hash_new_capa(n) rb_hash_new()
+#endif
- rb_exc_raise(rb_exc_new3(exc, mesg));
+#ifndef HAVE_RB_STR_TO_INTERNED_STR
+static VALUE rb_str_to_interned_str(VALUE str)
+{
+ return rb_funcall(rb_str_freeze(str), i_uminus, 0);
}
-# define rb_enc_raise enc_raise
+#endif
+
+/* name cache */
+
+#include <string.h>
+#include <ctype.h>
+
+// Object names are likely to be repeated, and are frozen.
+// As such we can re-use them if we keep a cache of the ones we've seen so far,
+// and save much more expensive lookups into the global fstring table.
+// This cache implementation is deliberately simple, as we're optimizing for compactness,
+// to be able to fit safely on the stack.
+// As such, binary search into a sorted array gives a good tradeoff between compactness and
+// performance.
+#define JSON_RVALUE_CACHE_CAPA 63
+typedef struct rvalue_cache_struct {
+ int length;
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
+} rvalue_cache;
+
+static rb_encoding *enc_utf8;
+
+#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
+
+static inline VALUE build_interned_string(const char *str, const long length)
+{
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ return rb_enc_interned_str(str, length, enc_utf8);
+# else
+ VALUE rstring = rb_utf8_str_new(str, length);
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
# endif
-#else
-# define EXC_ENCODING /* nothing */
-# define rb_enc_raise rb_raise
+}
+
+static inline VALUE build_symbol(const char *str, const long length)
+{
+ return rb_str_intern(build_interned_string(str, length));
+}
+
+static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
+{
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
+ cache->length++;
+ cache->entries[index] = rstring;
+}
+
+#define rstring_cache_memcmp memcmp
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+#if __has_builtin(__builtin_bswap64)
+#undef rstring_cache_memcmp
+ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
+{
+ // The libc memcmp has numerous complex optimizations, but in this particular case,
+ // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
+ // inline a simpler memcmp outperforms calling the libc version.
+ long i = 0;
+
+ for (; i + 8 <= length; i += 8) {
+ uint64_t a, b;
+ memcpy(&a, str + i, 8);
+ memcpy(&b, rptr + i, 8);
+ if (a != b) {
+ a = __builtin_bswap64(a);
+ b = __builtin_bswap64(b);
+ return (a < b) ? -1 : 1;
+ }
+ }
+
+ for (; i < length; i++) {
+ if (str[i] != rptr[i]) {
+ return (str[i] < rptr[i]) ? -1 : 1;
+ }
+ }
+
+ return 0;
+}
+#endif
#endif
-/* unicode */
+ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
+{
+ const char *rstring_ptr;
+ long rstring_length;
-static const char digit_values[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1
+ RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
+
+ if (length == rstring_length) {
+ return rstring_cache_memcmp(str, rstring_ptr, length);
+ } else {
+ return (int)(length - rstring_length);
+ }
+}
+
+ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ int low = 0;
+ int high = cache->length - 1;
+
+ while (low <= high) {
+ int mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ int cmp = rstring_cache_cmp(str, length, entry);
+
+ if (cmp == 0) {
+ return entry;
+ } else if (cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ VALUE rstring = build_interned_string(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ rvalue_cache_insert_at(cache, low, rstring);
+ }
+ return rstring;
+}
+
+static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ int low = 0;
+ int high = cache->length - 1;
+
+ while (low <= high) {
+ int mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
+
+ if (cmp == 0) {
+ return entry;
+ } else if (cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ VALUE rsymbol = build_symbol(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ rvalue_cache_insert_at(cache, low, rsymbol);
+ }
+ return rsymbol;
+}
+
+/* rvalue stack */
+
+#define RVALUE_STACK_INITIAL_CAPA 128
+
+enum rvalue_stack_type {
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
+ RVALUE_STACK_STACK_ALLOCATED = 1,
};
-static UTF32 unescape_unicode(const unsigned char *p)
+typedef struct rvalue_stack_struct {
+ enum rvalue_stack_type type;
+ long capa;
+ long head;
+ VALUE *ptr;
+} rvalue_stack;
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
+
+static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
{
- char b;
- UTF32 result = 0;
- b = digit_values[p[0]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[1]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[2]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[3]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- return result;
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
+ } else {
+ REALLOC_N(stack->ptr, VALUE, required);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
+{
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
+ }
+ stack->ptr[stack->head] = value;
+ stack->head++;
+ return value;
+}
+
+static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
+{
+ return stack->ptr + (stack->head - count);
+}
+
+static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
+{
+ stack->head -= count;
+}
+
+static void rvalue_stack_mark(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ long index;
+ for (index = 0; index < stack->head; index++) {
+ rb_gc_mark(stack->ptr[index]);
+ }
}
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
+static void rvalue_stack_free(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ if (stack) {
+ ruby_xfree(stack->ptr);
+ ruby_xfree(stack);
+ }
+}
+
+static size_t rvalue_stack_memsize(const void *ptr)
+{
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
+}
+
+static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
+ "JSON::Ext::Parser/rvalue_stack",
+ {
+ .dmark = rvalue_stack_mark,
+ .dfree = rvalue_stack_free,
+ .dsize = rvalue_stack_memsize,
+ },
+ 0, 0,
+ RUBY_TYPED_FREE_IMMEDIATELY,
+};
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ rvalue_stack *stack;
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
+ return stack;
+}
+
+static void rvalue_stack_eagerly_release(VALUE handle)
+{
+ if (handle) {
+ rvalue_stack *stack;
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+ rvalue_stack_free(stack);
+ }
+}
+
+static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
{
int len = 1;
if (ch <= 0x7F) {
@@ -89,1602 +321,1079 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
return len;
}
-static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-static VALUE cBigDecimal = Qundef;
+enum duplicate_key_action {
+ JSON_DEPRECATED = 0,
+ JSON_IGNORE,
+ JSON_RAISE,
+};
+
+typedef struct JSON_ParserStruct {
+ VALUE on_load_proc;
+ VALUE decimal_class;
+ ID decimal_method_id;
+ enum duplicate_key_action on_duplicate_key;
+ int max_nesting;
+ bool allow_nan;
+ bool allow_trailing_comma;
+ bool allow_control_characters;
+ bool symbolize_names;
+ bool freeze;
+} JSON_ParserConfig;
+
+typedef struct JSON_ParserStateStruct {
+ VALUE stack_handle;
+ const char *start;
+ const char *cursor;
+ const char *end;
+ rvalue_stack *stack;
+ rvalue_cache name_cache;
+ int in_array;
+ int current_nesting;
+} JSON_ParserState;
+
+static inline size_t rest(JSON_ParserState *state) {
+ return state->end - state->cursor;
+}
+
+static inline bool eos(JSON_ParserState *state) {
+ return state->cursor >= state->end;
+}
+
+static inline char peek(JSON_ParserState *state)
+{
+ if (RB_UNLIKELY(eos(state))) {
+ return 0;
+ }
+ return *state->cursor;
+}
+
+static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
+{
+ const char *cursor = state->cursor;
+ long column = 0;
+ long line = 1;
+
+ while (cursor >= state->start) {
+ if (*cursor-- == '\n') {
+ break;
+ }
+ column++;
+ }
+
+ while (cursor >= state->start) {
+ if (*cursor-- == '\n') {
+ line++;
+ }
+ }
+ *line_out = line;
+ *column_out = column;
+}
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
- i_object_class, i_array_class, i_decimal_class, i_key_p,
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
- i_leftshift, i_new, i_BigDecimal;
+static void emit_parse_warning(const char *message, JSON_ParserState *state)
+{
+ long line, column;
+ cursor_position(state, &line, &column);
+ VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
+ rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
+}
-#line 126 "parser.rl"
+#define PARSE_ERROR_FRAGMENT_LEN 32
+#ifdef RBIMPL_ATTR_NORETURN
+RBIMPL_ATTR_NORETURN()
+#endif
+static void raise_parse_error(const char *format, JSON_ParserState *state)
+{
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
+ long line, column;
+ cursor_position(state, &line, &column);
+
+ const char *ptr = "EOF";
+ if (state->cursor && state->cursor < state->end) {
+ ptr = state->cursor;
+ size_t len = 0;
+ while (len < PARSE_ERROR_FRAGMENT_LEN) {
+ char ch = ptr[len];
+ if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
+ break;
+ }
+ len++;
+ }
+ if (len) {
+ buffer[0] = '\'';
+ MEMCPY(buffer + 1, ptr, char, len);
-#line 108 "parser.c"
-enum {JSON_object_start = 1};
-enum {JSON_object_first_final = 27};
-enum {JSON_object_error = 0};
+ while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
+ len--;
+ }
-enum {JSON_object_en_main = 1};
+ if (buffer[len] >= 0xC0) { // multibyte character start
+ len--;
+ }
+ buffer[len + 1] = '\'';
+ buffer[len + 2] = '\0';
+ ptr = (const char *)buffer;
+ }
+ }
-#line 167 "parser.rl"
+ VALUE msg = rb_sprintf(format, ptr);
+ VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
+ RB_GC_GUARD(msg);
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
+ rb_exc_raise(exc);
+}
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
+#ifdef RBIMPL_ATTR_NORETURN
+RBIMPL_ATTR_NORETURN()
+#endif
+static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
{
- int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
+ state->cursor = at;
+ raise_parse_error(format, state);
+}
+
+/* unicode */
+
+static const signed char digit_values[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1
+};
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
+static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
+{
+ if (RB_UNLIKELY(sp > spe - 4)) {
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
}
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
-
-
-#line 132 "parser.c"
- {
- cs = JSON_object_start;
- }
-
-#line 182 "parser.rl"
-
-#line 139 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 123 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 13: goto st2;
- case 32: goto st2;
- case 34: goto tr2;
- case 47: goto st23;
- case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st2;
- goto st0;
-tr2:
-#line 149 "parser.rl"
- {
- char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, p, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;}
+ const unsigned char *p = (const unsigned char *)sp;
+
+ const signed char b0 = digit_values[p[0]];
+ const signed char b1 = digit_values[p[1]];
+ const signed char b2 = digit_values[p[2]];
+ const signed char b3 = digit_values[p[3]];
+
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
}
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 180 "parser.c"
- switch( (*p) ) {
- case 13: goto st3;
- case 32: goto st3;
- case 47: goto st4;
- case 58: goto st8;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st3;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st5;
- case 47: goto st7;
- }
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 42 )
- goto st6;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st3;
- }
- goto st5;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 10 )
- goto st3;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 13: goto st8;
- case 32: goto st8;
- case 34: goto tr11;
- case 45: goto tr11;
- case 47: goto st19;
- case 73: goto tr11;
- case 78: goto tr11;
- case 91: goto tr11;
- case 102: goto tr11;
- case 110: goto tr11;
- case 116: goto tr11;
- case 123: goto tr11;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr11;
- } else if ( (*p) >= 9 )
- goto st8;
- goto st0;
-tr11:
-#line 134 "parser.rl"
- {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
- if (np == NULL) {
- p--; {p++; cs = 9; goto _out;}
- } else {
- if (NIL_P(json->object_class)) {
- rb_hash_aset(*result, last_name, v);
+
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
+}
+
+#define GET_PARSER_CONFIG \
+ JSON_ParserConfig *config; \
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
+
+static const rb_data_type_t JSON_ParserConfig_type;
+
+static void
+json_eat_comments(JSON_ParserState *state)
+{
+ const char *start = state->cursor;
+ state->cursor++;
+
+ switch (peek(state)) {
+ case '/': {
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
+ if (!state->cursor) {
+ state->cursor = state->end;
} else {
- rb_funcall(*result, i_aset, 2, last_name, v);
+ state->cursor++;
}
- {p = (( np))-1;}
+ break;
}
- }
- goto st9;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
-#line 267 "parser.c"
- switch( (*p) ) {
- case 13: goto st9;
- case 32: goto st9;
- case 44: goto st10;
- case 47: goto st15;
- case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st9;
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- switch( (*p) ) {
- case 13: goto st10;
- case 32: goto st10;
- case 34: goto tr2;
- case 47: goto st11;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st10;
- goto st0;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- switch( (*p) ) {
- case 42: goto st12;
- case 47: goto st14;
- }
- goto st0;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 42 )
- goto st13;
- goto st12;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- switch( (*p) ) {
- case 42: goto st13;
- case 47: goto st10;
- }
- goto st12;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 10 )
- goto st10;
- goto st14;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- switch( (*p) ) {
- case 42: goto st16;
- case 47: goto st18;
- }
- goto st0;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 42 )
- goto st17;
- goto st16;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- switch( (*p) ) {
- case 42: goto st17;
- case 47: goto st9;
- }
- goto st16;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- if ( (*p) == 10 )
- goto st9;
- goto st18;
-tr4:
-#line 157 "parser.rl"
- { p--; {p++; cs = 27; goto _out;} }
- goto st27;
-st27:
- if ( ++p == pe )
- goto _test_eof27;
-case 27:
-#line 363 "parser.c"
- goto st0;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- switch( (*p) ) {
- case 42: goto st20;
- case 47: goto st22;
- }
- goto st0;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- if ( (*p) == 42 )
- goto st21;
- goto st20;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
- switch( (*p) ) {
- case 42: goto st21;
- case 47: goto st8;
- }
- goto st20;
-st22:
- if ( ++p == pe )
- goto _test_eof22;
-case 22:
- if ( (*p) == 10 )
- goto st8;
- goto st22;
-st23:
- if ( ++p == pe )
- goto _test_eof23;
-case 23:
- switch( (*p) ) {
- case 42: goto st24;
- case 47: goto st26;
- }
- goto st0;
-st24:
- if ( ++p == pe )
- goto _test_eof24;
-case 24:
- if ( (*p) == 42 )
- goto st25;
- goto st24;
-st25:
- if ( ++p == pe )
- goto _test_eof25;
-case 25:
- switch( (*p) ) {
- case 42: goto st25;
- case 47: goto st2;
- }
- goto st24;
-st26:
- if ( ++p == pe )
- goto _test_eof26;
-case 26:
- if ( (*p) == 10 )
- goto st2;
- goto st26;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof27: cs = 27; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
- _test_eof21: cs = 21; goto _test_eof;
- _test_eof22: cs = 22; goto _test_eof;
- _test_eof23: cs = 23; goto _test_eof;
- _test_eof24: cs = 24; goto _test_eof;
- _test_eof25: cs = 25; goto _test_eof;
- _test_eof26: cs = 26; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 183 "parser.rl"
-
- if (cs >= JSON_object_first_final) {
- if (json->create_additions) {
- VALUE klassname;
- if (NIL_P(json->object_class)) {
- klassname = rb_hash_aref(*result, json->create_id);
- } else {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
- }
- if (!NIL_P(klassname)) {
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
+ case '*': {
+ state->cursor++;
+
+ while (true) {
+ const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
+ if (!next_match) {
+ raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
+ }
+
+ state->cursor = next_match + 1;
+ if (peek(state) == '/') {
+ state->cursor++;
+ break;
}
}
+ break;
+ }
+ default:
+ raise_parse_error_at("unexpected token %s", state, start);
+ break;
+ }
+}
+
+ALWAYS_INLINE(static) void
+json_eat_whitespace(JSON_ParserState *state)
+{
+ while (true) {
+ switch (peek(state)) {
+ case ' ':
+ state->cursor++;
+ break;
+ case '\n':
+ state->cursor++;
+
+ // Heuristic: if we see a newline, there is likely consecutive spaces after it.
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+ while (rest(state) > 8) {
+ uint64_t chunk;
+ memcpy(&chunk, state->cursor, sizeof(uint64_t));
+ if (chunk == 0x2020202020202020) {
+ state->cursor += 8;
+ continue;
+ }
+
+ uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
+ state->cursor += consecutive_spaces;
+ break;
+ }
+#endif
+ break;
+ case '\t':
+ case '\r':
+ state->cursor++;
+ break;
+ case '/':
+ json_eat_comments(state);
+ break;
+
+ default:
+ return;
}
- return p + 1;
- } else {
- return NULL;
}
}
+static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
+{
+ if (symbolize) {
+ intern = true;
+ }
+ VALUE result;
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ if (intern) {
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
+ } else {
+ result = rb_utf8_str_new(start, (long)(end - start));
+ }
+# else
+ result = rb_utf8_str_new(start, (long)(end - start));
+ if (intern) {
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
+ }
+# endif
+
+ if (symbolize) {
+ result = rb_str_intern(result);
+ }
+ return result;
+}
-#line 486 "parser.c"
-enum {JSON_value_start = 1};
-enum {JSON_value_first_final = 29};
-enum {JSON_value_error = 0};
+static inline bool json_string_cacheable_p(const char *string, size_t length)
+{
+ // We mostly want to cache strings that are likely to be repeated.
+ // Simple heuristics:
+ // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
+ // - If the first character isn't a letter, we're much less likely to see this string again.
+ return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
+}
-enum {JSON_value_en_main = 1};
+static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
+{
+ bool intern = is_name || config->freeze;
+ bool symbolize = is_name && config->symbolize_names;
+ size_t bufferSize = stringEnd - string;
+
+ if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
+ VALUE cached_key;
+ if (RB_UNLIKELY(symbolize)) {
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
+ } else {
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
+ }
+ if (RB_LIKELY(cached_key)) {
+ return cached_key;
+ }
+ }
-#line 283 "parser.rl"
+ return build_string(string, stringEnd, intern, symbolize);
+}
+#define JSON_MAX_UNESCAPE_POSITIONS 16
+typedef struct _json_unescape_positions {
+ long size;
+ const char **positions;
+ unsigned long additional_backslashes;
+} JSON_UnescapePositions;
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
+static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
{
- int cs = EVIL;
-
-
-#line 502 "parser.c"
- {
- cs = JSON_value_start;
- }
-
-#line 290 "parser.rl"
-
-#line 509 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-st1:
- if ( ++p == pe )
- goto _test_eof1;
-case 1:
- switch( (*p) ) {
- case 13: goto st1;
- case 32: goto st1;
- case 34: goto tr2;
- case 45: goto tr3;
- case 47: goto st6;
- case 73: goto st10;
- case 78: goto st17;
- case 91: goto tr7;
- case 102: goto st19;
- case 110: goto st23;
- case 116: goto st26;
- case 123: goto tr11;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr3;
- } else if ( (*p) >= 9 )
- goto st1;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-tr2:
-#line 235 "parser.rl"
- {
- char *np = JSON_parse_string(json, p, pe, result);
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
+ while (positions->size) {
+ positions->size--;
+ const char *next_position = positions->positions[0];
+ positions->positions++;
+ if (next_position >= pe) {
+ return next_position;
+ }
}
- goto st29;
-tr3:
-#line 240 "parser.rl"
- {
- char *np;
- if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
- if (json->allow_nan) {
- *result = CMinusInfinity;
- {p = (( p + 10))-1;}
- p--; {p++; cs = 29; goto _out;}
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
+
+ if (positions->additional_backslashes) {
+ positions->additional_backslashes--;
+ return memchr(pe, '\\', stringEnd - pe);
+ }
+
+ return NULL;
+}
+
+NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
+{
+ bool intern = is_name || config->freeze;
+ bool symbolize = is_name && config->symbolize_names;
+ size_t bufferSize = stringEnd - string;
+ const char *p = string, *pe = string, *bufferStart;
+ char *buffer;
+
+ VALUE result = rb_str_buf_new(bufferSize);
+ rb_enc_associate_index(result, utf8_encindex);
+ buffer = RSTRING_PTR(result);
+ bufferStart = buffer;
+
+#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
+
+ while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
+ if (pe > p) {
+ MEMCPY(buffer, p, char, pe - p);
+ buffer += pe - p;
+ }
+ switch (*++pe) {
+ case '"':
+ case '/':
+ p = pe; // nothing to unescape just need to skip the backslash
+ break;
+ case '\\':
+ APPEND_CHAR('\\');
+ break;
+ case 'n':
+ APPEND_CHAR('\n');
+ break;
+ case 'r':
+ APPEND_CHAR('\r');
+ break;
+ case 't':
+ APPEND_CHAR('\t');
+ break;
+ case 'b':
+ APPEND_CHAR('\b');
+ break;
+ case 'f':
+ APPEND_CHAR('\f');
+ break;
+ case 'u': {
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
+ pe += 3;
+ /* To handle values above U+FFFF, we take a sequence of
+ * \uXXXX escapes in the U+D800..U+DBFF then
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
+ * to make a 20-bit number, then add 0x10000 to get the
+ * final codepoint.
+ *
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
+ * Area".
+ */
+ if ((ch & 0xFC00) == 0xD800) {
+ pe++;
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
+
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
+ }
+
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
+ pe += 5;
+ } else {
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
+ break;
+ }
+ }
+
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
+ buffer += unescape_len;
+ p = ++pe;
+ break;
}
+ default:
+ if ((unsigned char)*pe < 0x20) {
+ if (!config->allow_control_characters) {
+ if (*pe == '\n') {
+ raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
+ }
+ raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
+ }
+ } else {
+ raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
+ }
+ break;
}
- np = JSON_parse_float(json, p, pe, result);
- if (np != NULL) {p = (( np))-1;}
- np = JSON_parse_integer(json, p, pe, result);
- if (np != NULL) {p = (( np))-1;}
- p--; {p++; cs = 29; goto _out;}
}
- goto st29;
-tr7:
-#line 258 "parser.rl"
- {
- char *np;
- np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
+#undef APPEND_CHAR
+
+ if (stringEnd > p) {
+ MEMCPY(buffer, p, char, stringEnd - p);
+ buffer += stringEnd - p;
}
- goto st29;
-tr11:
-#line 264 "parser.rl"
- {
- char *np;
- np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
+ rb_str_set_len(result, buffer - bufferStart);
+
+ if (symbolize) {
+ result = rb_str_intern(result);
+ } else if (intern) {
+ result = rb_str_to_interned_str(result);
}
- goto st29;
-tr25:
-#line 228 "parser.rl"
- {
- if (json->allow_nan) {
- *result = CInfinity;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
+
+ return result;
+}
+
+#define MAX_FAST_INTEGER_SIZE 18
+
+static VALUE json_decode_large_integer(const char *start, long len)
+{
+ VALUE buffer_v;
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ VALUE number = rb_cstr2inum(buffer, 10);
+ RB_ALLOCV_END(buffer_v);
+ return number;
+}
+
+static inline VALUE
+json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
+{
+ if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
+ if (negative) {
+ return INT64T2NUM(-((int64_t)mantissa));
}
+ return UINT64T2NUM(mantissa);
}
- goto st29;
-tr27:
-#line 221 "parser.rl"
- {
- if (json->allow_nan) {
- *result = CNaN;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
- }
+
+ return json_decode_large_integer(start, end - start);
+}
+
+static VALUE json_decode_large_float(const char *start, long len)
+{
+ if (RB_LIKELY(len < 64)) {
+ char buffer[64];
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
}
- goto st29;
-tr31:
-#line 215 "parser.rl"
- {
- *result = Qfalse;
+
+ VALUE buffer_v;
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
+ RB_ALLOCV_END(buffer_v);
+ return number;
+}
+
+/* Ruby JSON optimized float decoder using vendored Ryu algorithm
+ * Accepts pre-extracted mantissa and exponent from first-pass validation
+ */
+static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
+ const char *start, const char *end)
+{
+ if (RB_UNLIKELY(config->decimal_class)) {
+ VALUE text = rb_str_new(start, end - start);
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
}
- goto st29;
-tr34:
-#line 212 "parser.rl"
- {
- *result = Qnil;
+
+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
+ if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
+ return json_decode_large_float(start, end - start);
}
- goto st29;
-tr37:
-#line 218 "parser.rl"
- {
- *result = Qtrue;
+
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
+}
+
+static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
+{
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
+ rvalue_stack_pop(state->stack, count);
+
+ if (config->freeze) {
+ RB_OBJ_FREEZE(array);
}
- goto st29;
-st29:
- if ( ++p == pe )
- goto _test_eof29;
-case 29:
-#line 270 "parser.rl"
- { p--; {p++; cs = 29; goto _out;} }
-#line 629 "parser.c"
- switch( (*p) ) {
- case 13: goto st29;
- case 32: goto st29;
- case 47: goto st2;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st29;
- goto st0;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 42: goto st3;
- case 47: goto st5;
- }
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 42 )
- goto st4;
- goto st3;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st4;
- case 47: goto st29;
- }
- goto st3;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 10 )
- goto st29;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st9;
- }
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 42 )
- goto st8;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 42: goto st8;
- case 47: goto st1;
- }
- goto st7;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- if ( (*p) == 10 )
- goto st1;
- goto st9;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- if ( (*p) == 110 )
- goto st11;
- goto st0;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- if ( (*p) == 102 )
- goto st12;
- goto st0;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 105 )
- goto st13;
- goto st0;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- if ( (*p) == 110 )
- goto st14;
- goto st0;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 105 )
- goto st15;
- goto st0;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- if ( (*p) == 116 )
- goto st16;
- goto st0;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 121 )
- goto tr25;
- goto st0;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- if ( (*p) == 97 )
- goto st18;
- goto st0;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- if ( (*p) == 78 )
- goto tr27;
- goto st0;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- if ( (*p) == 97 )
- goto st20;
- goto st0;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- if ( (*p) == 108 )
- goto st21;
- goto st0;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
- if ( (*p) == 115 )
- goto st22;
- goto st0;
-st22:
- if ( ++p == pe )
- goto _test_eof22;
-case 22:
- if ( (*p) == 101 )
- goto tr31;
- goto st0;
-st23:
- if ( ++p == pe )
- goto _test_eof23;
-case 23:
- if ( (*p) == 117 )
- goto st24;
- goto st0;
-st24:
- if ( ++p == pe )
- goto _test_eof24;
-case 24:
- if ( (*p) == 108 )
- goto st25;
- goto st0;
-st25:
- if ( ++p == pe )
- goto _test_eof25;
-case 25:
- if ( (*p) == 108 )
- goto tr34;
- goto st0;
-st26:
- if ( ++p == pe )
- goto _test_eof26;
-case 26:
- if ( (*p) == 114 )
- goto st27;
- goto st0;
-st27:
- if ( ++p == pe )
- goto _test_eof27;
-case 27:
- if ( (*p) == 117 )
- goto st28;
- goto st0;
-st28:
- if ( ++p == pe )
- goto _test_eof28;
-case 28:
- if ( (*p) == 101 )
- goto tr37;
- goto st0;
- }
- _test_eof1: cs = 1; goto _test_eof;
- _test_eof29: cs = 29; goto _test_eof;
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
- _test_eof21: cs = 21; goto _test_eof;
- _test_eof22: cs = 22; goto _test_eof;
- _test_eof23: cs = 23; goto _test_eof;
- _test_eof24: cs = 24; goto _test_eof;
- _test_eof25: cs = 25; goto _test_eof;
- _test_eof26: cs = 26; goto _test_eof;
- _test_eof27: cs = 27; goto _test_eof;
- _test_eof28: cs = 28; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 291 "parser.rl"
-
- if (cs >= JSON_value_first_final) {
- return p;
- } else {
- return NULL;
+
+ return array;
+}
+
+static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
+{
+ VALUE set = rb_hash_new_capa(count / 2);
+ for (size_t index = 0; index < count; index += 2) {
+ size_t before = RHASH_SIZE(set);
+ VALUE key = pairs[index];
+ rb_hash_aset(set, key, Qtrue);
+ if (RHASH_SIZE(set) == before) {
+ if (RB_SYMBOL_P(key)) {
+ return rb_sym2str(key);
+ }
+ return key;
+ }
}
+ return Qfalse;
+}
+
+static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
+{
+ VALUE message = rb_sprintf(
+ "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
+ rb_inspect(duplicate_key)
+ );
+
+ emit_parse_warning(RSTRING_PTR(message), state);
+ RB_GC_GUARD(message);
}
+#ifdef RBIMPL_ATTR_NORETURN
+RBIMPL_ATTR_NORETURN()
+#endif
+static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
+{
+ VALUE message = rb_sprintf(
+ "duplicate key %"PRIsVALUE,
+ rb_inspect(duplicate_key)
+ );
-#line 880 "parser.c"
-enum {JSON_integer_start = 1};
-enum {JSON_integer_first_final = 3};
-enum {JSON_integer_error = 0};
+ raise_parse_error(RSTRING_PTR(message), state);
+ RB_GC_GUARD(message);
+}
-enum {JSON_integer_en_main = 1};
+static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
+{
+ size_t entries_count = count / 2;
+ VALUE object = rb_hash_new_capa(entries_count);
+ const VALUE *pairs = rvalue_stack_peek(state->stack, count);
+ rb_hash_bulk_insert(count, pairs, object);
+
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
+ switch (config->on_duplicate_key) {
+ case JSON_IGNORE:
+ break;
+ case JSON_DEPRECATED:
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
+ break;
+ case JSON_RAISE:
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
+ break;
+ }
+ }
+ rvalue_stack_pop(state->stack, count);
-#line 307 "parser.rl"
+ if (config->freeze) {
+ RB_OBJ_FREEZE(object);
+ }
+ return object;
+}
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
+static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
{
- int cs = EVIL;
-
-
-#line 896 "parser.c"
- {
- cs = JSON_integer_start;
- }
-
-#line 314 "parser.rl"
- json->memo = p;
-
-#line 904 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- switch( (*p) ) {
- case 45: goto st2;
- case 48: goto st3;
- }
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st5;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- if ( (*p) == 48 )
- goto st3;
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st5;
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st0;
- goto tr4;
-tr4:
-#line 304 "parser.rl"
- { p--; {p++; cs = 4; goto _out;} }
- goto st4;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
-#line 945 "parser.c"
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st5;
- goto tr4;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 316 "parser.rl"
-
- if (cs >= JSON_integer_first_final) {
- long len = p - json->memo;
- fbuffer_clear(json->fbuffer);
- fbuffer_append(json->fbuffer, json->memo, len);
- fbuffer_append_char(json->fbuffer, '\0');
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
- return p + 1;
- } else {
- return NULL;
+ if (RB_UNLIKELY(config->on_load_proc)) {
+ value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
}
+ rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
+ return value;
}
+static const bool string_scan_table[256] = {
+ // ASCII Control Characters
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // ASCII Characters
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+#ifdef HAVE_SIMD
+static SIMD_Implementation simd_impl = SIMD_NONE;
+#endif /* HAVE_SIMD */
+
+ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
+{
+#ifdef HAVE_SIMD
+#if defined(HAVE_SIMD_NEON)
+
+ uint64_t mask = 0;
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
+ state->cursor += trailing_zeros64(mask) >> 2;
+ return true;
+ }
-#line 979 "parser.c"
-enum {JSON_float_start = 1};
-enum {JSON_float_first_final = 8};
-enum {JSON_float_error = 0};
+#elif defined(HAVE_SIMD_SSE2)
+ if (simd_impl == SIMD_SSE2) {
+ int mask = 0;
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
+ state->cursor += trailing_zeros(mask);
+ return true;
+ }
+ }
+#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
+#endif /* HAVE_SIMD */
-enum {JSON_float_en_main = 1};
+ while (!eos(state)) {
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
+ return true;
+ }
+ state->cursor++;
+ }
+ return false;
+}
+static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
+{
+ const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
+ JSON_UnescapePositions positions = {
+ .size = 0,
+ .positions = backslashes,
+ .additional_backslashes = 0,
+ };
+
+ do {
+ switch (*state->cursor) {
+ case '"': {
+ VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
+ state->cursor++;
+ return json_push_value(state, config, string);
+ }
+ case '\\': {
+ if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
+ backslashes[positions.size] = state->cursor;
+ positions.size++;
+ } else {
+ positions.additional_backslashes++;
+ }
+ state->cursor++;
+ break;
+ }
+ default:
+ if (!config->allow_control_characters) {
+ raise_parse_error("invalid ASCII control character in string: %s", state);
+ }
+ break;
+ }
-#line 341 "parser.rl"
+ state->cursor++;
+ } while (string_scan(state));
+ raise_parse_error("unexpected end of input, expected closing \"", state);
+ return Qfalse;
+}
-static int is_bigdecimal_class(VALUE obj)
+ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
{
- if (cBigDecimal == Qundef) {
- if (rb_const_defined(rb_cObject, i_BigDecimal)) {
- cBigDecimal = rb_const_get_at(rb_cObject, i_BigDecimal);
+ state->cursor++;
+ const char *start = state->cursor;
+
+ if (RB_UNLIKELY(!string_scan(state))) {
+ raise_parse_error("unexpected end of input, expected closing \"", state);
}
- else {
- return 0;
+
+ if (RB_LIKELY(*state->cursor == '"')) {
+ VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
+ state->cursor++;
+ return json_push_value(state, config, string);
}
- }
- return obj == cBigDecimal;
+ return json_parse_escaped_string(state, config, is_name, start);
+}
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
+// Additional References:
+// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
+static inline uint64_t decode_8digits_unrolled(uint64_t val) {
+ const uint64_t mask = 0x000000FF000000FF;
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+ val -= 0x3030303030303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+ return val;
+}
+
+static inline uint64_t decode_4digits_unrolled(uint32_t val) {
+ const uint32_t mask = 0x000000FF;
+ const uint32_t mul1 = 100;
+ val -= 0x30303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
+ return val;
}
+#endif
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
+static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
{
- int cs = EVIL;
-
-
-#line 1008 "parser.c"
- {
- cs = JSON_float_start;
- }
-
-#line 361 "parser.rl"
- json->memo = p;
-
-#line 1016 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- switch( (*p) ) {
- case 45: goto st2;
- case 48: goto st3;
- }
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st7;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- if ( (*p) == 48 )
- goto st3;
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st7;
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- switch( (*p) ) {
- case 46: goto st4;
- case 69: goto st5;
- case 101: goto st5;
- }
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- goto st0;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 69: goto st5;
- case 101: goto st5;
- }
- if ( (*p) > 46 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- } else if ( (*p) >= 45 )
- goto st0;
- goto tr9;
-tr9:
-#line 335 "parser.rl"
- { p--; {p++; cs = 9; goto _out;} }
- goto st9;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
-#line 1081 "parser.c"
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- switch( (*p) ) {
- case 43: goto st6;
- case 45: goto st6;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st10;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st10;
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- switch( (*p) ) {
- case 69: goto st0;
- case 101: goto st0;
- }
- if ( (*p) > 46 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st10;
- } else if ( (*p) >= 45 )
- goto st0;
- goto tr9;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- switch( (*p) ) {
- case 46: goto st4;
- case 69: goto st5;
- case 101: goto st5;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st7;
- goto st0;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 363 "parser.rl"
-
- if (cs >= JSON_float_first_final) {
- long len = p - json->memo;
- fbuffer_clear(json->fbuffer);
- fbuffer_append(json->fbuffer, json->memo, len);
- fbuffer_append_char(json->fbuffer, '\0');
- if (NIL_P(json->decimal_class)) {
- *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
- } else {
- VALUE text;
- text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
- if (is_bigdecimal_class(json->decimal_class)) {
- *result = rb_funcall(Qnil, i_BigDecimal, 1, text);
- } else {
- *result = rb_funcall(json->decimal_class, i_new, 1, text);
- }
+ const char *start = state->cursor;
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+ while (rest(state) >= sizeof(uint64_t)) {
+ uint64_t next_8bytes;
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
+
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
+
+ if (match == 0x3333333333333333) { // 8 consecutive digits
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
+ state->cursor += 8;
+ continue;
}
- return p + 1;
- } else {
- return NULL;
+
+ uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
+
+ if (consecutive_digits >= 4) {
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
+ state->cursor += 4;
+ consecutive_digits -= 4;
+ }
+
+ while (consecutive_digits) {
+ *accumulator = *accumulator * 10 + (*state->cursor - '0');
+ consecutive_digits--;
+ state->cursor++;
+ }
+
+ return (int)(state->cursor - start);
+ }
+#endif
+
+ char next_char;
+ while (rb_isdigit(next_char = peek(state))) {
+ *accumulator = *accumulator * 10 + (next_char - '0');
+ state->cursor++;
}
+ return (int)(state->cursor - start);
}
+static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
+{
+ bool integer = true;
+ const char first_digit = *state->cursor;
+ // Variables for Ryu optimization - extract digits during parsing
+ int32_t exponent = 0;
+ int decimal_point_pos = -1;
+ uint64_t mantissa = 0;
-#line 1168 "parser.c"
-enum {JSON_array_start = 1};
-enum {JSON_array_first_final = 17};
-enum {JSON_array_error = 0};
+ // Parse integer part and extract mantissa digits
+ int mantissa_digits = json_parse_digits(state, &mantissa);
-enum {JSON_array_en_main = 1};
+ if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+ // Parse fractional part
+ if (peek(state) == '.') {
+ integer = false;
+ decimal_point_pos = mantissa_digits; // Remember position of decimal point
+ state->cursor++;
-#line 416 "parser.rl"
+ int fractional_digits = json_parse_digits(state, &mantissa);
+ mantissa_digits += fractional_digits;
+ if (RB_UNLIKELY(!fractional_digits)) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+ }
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
- VALUE array_class = json->array_class;
+ // Parse exponent
+ if (rb_tolower(peek(state)) == 'e') {
+ integer = false;
+ state->cursor++;
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
-
-
-#line 1190 "parser.c"
- {
- cs = JSON_array_start;
- }
-
-#line 429 "parser.rl"
-
-#line 1197 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 91 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 13: goto st2;
- case 32: goto st2;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st13;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 93: goto tr4;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st2;
- goto st0;
-tr2:
-#line 393 "parser.rl"
- {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
- if (np == NULL) {
- p--; {p++; cs = 3; goto _out;}
- } else {
- if (NIL_P(json->array_class)) {
- rb_ary_push(*result, v);
- } else {
- rb_funcall(*result, i_leftshift, 1, v);
- }
- {p = (( np))-1;}
+ bool negative_exponent = false;
+ const char next_char = peek(state);
+ if (next_char == '-' || next_char == '+') {
+ negative_exponent = next_char == '-';
+ state->cursor++;
+ }
+
+ uint64_t abs_exponent = 0;
+ int exponent_digits = json_parse_digits(state, &abs_exponent);
+
+ if (RB_UNLIKELY(!exponent_digits)) {
+ raise_parse_error_at("invalid number: %s", state, start);
}
+
+ exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
}
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 1256 "parser.c"
- switch( (*p) ) {
- case 13: goto st3;
- case 32: goto st3;
- case 44: goto st4;
- case 47: goto st9;
- case 93: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st3;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 13: goto st4;
- case 32: goto st4;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st5;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st4;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st8;
- }
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( (*p) == 42 )
- goto st7;
- goto st6;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st4;
- }
- goto st6;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- if ( (*p) == 10 )
- goto st4;
- goto st8;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- switch( (*p) ) {
- case 42: goto st10;
- case 47: goto st12;
- }
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- if ( (*p) == 42 )
- goto st11;
- goto st10;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- switch( (*p) ) {
- case 42: goto st11;
- case 47: goto st3;
- }
- goto st10;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 10 )
- goto st3;
- goto st12;
-tr4:
-#line 408 "parser.rl"
- { p--; {p++; cs = 17; goto _out;} }
- goto st17;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
-#line 1363 "parser.c"
- goto st0;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- switch( (*p) ) {
- case 42: goto st14;
- case 47: goto st16;
- }
- goto st0;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 42 )
- goto st15;
- goto st14;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- switch( (*p) ) {
- case 42: goto st15;
- case 47: goto st2;
- }
- goto st14;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 10 )
- goto st2;
- goto st16;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 430 "parser.rl"
-
- if(cs >= JSON_array_first_final) {
- return p + 1;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return NULL;
+
+ if (integer) {
+ return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
+ }
+
+ // Adjust exponent based on decimal point position
+ if (decimal_point_pos >= 0) {
+ exponent -= (mantissa_digits - decimal_point_pos);
}
+
+ return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
}
-static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
+static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
{
- char *p = string, *pe = string, *unescape;
- int unescape_len;
- char buf[4];
-
- while (pe < stringEnd) {
- if (*pe == '\\') {
- unescape = (char *) "?";
- unescape_len = 1;
- if (pe > p) rb_str_buf_cat(result, p, pe - p);
- switch (*++pe) {
- case 'n':
- unescape = (char *) "\n";
- break;
- case 'r':
- unescape = (char *) "\r";
- break;
- case 't':
- unescape = (char *) "\t";
- break;
- case '"':
- unescape = (char *) "\"";
- break;
- case '\\':
- unescape = (char *) "\\";
- break;
- case 'b':
- unescape = (char *) "\b";
- break;
- case 'f':
- unescape = (char *) "\f";
- break;
- case 'u':
- if (pe > stringEnd - 4) {
- rb_enc_raise(
- EXC_ENCODING eParserError,
- "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p
- );
- } else {
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
- pe += 3;
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
- pe++;
- if (pe > stringEnd - 6) {
- rb_enc_raise(
- EXC_ENCODING eParserError,
- "%u: incomplete surrogate pair at '%s'", __LINE__, p
- );
- }
- if (pe[0] == '\\' && pe[1] == 'u') {
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
- | (sur & 0x3FF));
- pe += 5;
- } else {
- unescape = (char *) "?";
- break;
- }
+ return json_parse_number(state, config, false, state->cursor);
+}
+
+static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ const char *start = state->cursor;
+ state->cursor++;
+ return json_parse_number(state, config, true, start);
+}
+
+static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ json_eat_whitespace(state);
+
+ switch (peek(state)) {
+ case 'n':
+ if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
+ state->cursor += 4;
+ return json_push_value(state, config, Qnil);
+ }
+
+ raise_parse_error("unexpected token %s", state);
+ break;
+ case 't':
+ if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
+ state->cursor += 4;
+ return json_push_value(state, config, Qtrue);
+ }
+
+ raise_parse_error("unexpected token %s", state);
+ break;
+ case 'f':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
+ state->cursor += 5;
+ return json_push_value(state, config, Qfalse);
+ }
+
+ raise_parse_error("unexpected token %s", state);
+ break;
+ case 'N':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
+ state->cursor += 3;
+ return json_push_value(state, config, CNaN);
+ }
+
+ raise_parse_error("unexpected token %s", state);
+ break;
+ case 'I':
+ if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
+ state->cursor += 8;
+ return json_push_value(state, config, CInfinity);
+ }
+
+ raise_parse_error("unexpected token %s", state);
+ break;
+ case '-': {
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
+ if (config->allow_nan) {
+ state->cursor += 9;
+ return json_push_value(state, config, CMinusInfinity);
+ } else {
+ raise_parse_error("unexpected token %s", state);
+ }
+ }
+ return json_push_value(state, config, json_parse_negative_number(state, config));
+ break;
+ }
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+ return json_push_value(state, config, json_parse_positive_number(state, config));
+ break;
+ case '"': {
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
+ return json_parse_string(state, config, false);
+ break;
+ }
+ case '[': {
+ state->cursor++;
+ json_eat_whitespace(state);
+ long stack_head = state->stack->head;
+
+ if (peek(state) == ']') {
+ state->cursor++;
+ return json_push_value(state, config, json_decode_array(state, config, 0));
+ } else {
+ state->current_nesting++;
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
+ }
+ state->in_array++;
+ json_parse_any(state, config);
+ }
+
+ while (true) {
+ json_eat_whitespace(state);
+
+ const char next_char = peek(state);
+
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == ']') {
+ continue;
}
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
- unescape = buf;
}
- break;
- default:
- p = pe;
+ json_parse_any(state, config);
continue;
+ }
+
+ if (next_char == ']') {
+ state->cursor++;
+ long count = state->stack->head - stack_head;
+ state->current_nesting--;
+ state->in_array--;
+ return json_push_value(state, config, json_decode_array(state, config, count));
+ }
+
+ raise_parse_error("expected ',' or ']' after array value", state);
}
- rb_str_buf_cat(result, unescape, unescape_len);
- p = ++pe;
- } else {
- pe++;
+ break;
}
- }
- rb_str_buf_cat(result, p, pe - p);
- return result;
-}
+ case '{': {
+ const char *object_start_cursor = state->cursor;
+ state->cursor++;
+ json_eat_whitespace(state);
+ long stack_head = state->stack->head;
-#line 1508 "parser.c"
-enum {JSON_string_start = 1};
-enum {JSON_string_first_final = 8};
-enum {JSON_string_error = 0};
+ if (peek(state) == '}') {
+ state->cursor++;
+ return json_push_value(state, config, json_decode_object(state, config, 0));
+ } else {
+ state->current_nesting++;
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
+ }
-enum {JSON_string_en_main = 1};
+ if (peek(state) != '"') {
+ raise_parse_error("expected object key, got %s", state);
+ }
+ json_parse_string(state, config, true);
+ json_eat_whitespace(state);
+ if (peek(state) != ':') {
+ raise_parse_error("expected ':' after object key", state);
+ }
+ state->cursor++;
-#line 537 "parser.rl"
+ json_parse_any(state, config);
+ }
+ while (true) {
+ json_eat_whitespace(state);
-static int
-match_i(VALUE regexp, VALUE klass, VALUE memo)
-{
- if (regexp == Qundef) return ST_STOP;
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
- rb_ary_push(memo, klass);
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
+ const char next_char = peek(state);
+ if (next_char == '}') {
+ state->cursor++;
+ state->current_nesting--;
+ size_t count = state->stack->head - stack_head;
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE match_string;
-
- *result = rb_str_buf_new(0);
-
-#line 1538 "parser.c"
- {
- cs = JSON_string_start;
- }
-
-#line 558 "parser.rl"
- json->memo = p;
-
-#line 1546 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 34 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 34: goto tr2;
- case 92: goto st3;
- }
- if ( 0 <= (*p) && (*p) <= 31 )
- goto st0;
- goto st2;
-tr2:
-#line 523 "parser.rl"
- {
- *result = json_string_unescape(*result, json->memo + 1, p);
- if (NIL_P(*result)) {
- p--;
- {p++; cs = 8; goto _out;}
- } else {
- FORCE_UTF8(*result);
- {p = (( p + 1))-1;}
+ // Temporary rewind cursor in case an error is raised
+ const char *final_cursor = state->cursor;
+ state->cursor = object_start_cursor;
+ VALUE object = json_decode_object(state, config, count);
+ state->cursor = final_cursor;
+
+ return json_push_value(state, config, object);
+ }
+
+ if (next_char == ',') {
+ state->cursor++;
+ json_eat_whitespace(state);
+
+ if (config->allow_trailing_comma) {
+ if (peek(state) == '}') {
+ continue;
+ }
+ }
+
+ if (RB_UNLIKELY(peek(state) != '"')) {
+ raise_parse_error("expected object key, got: %s", state);
+ }
+ json_parse_string(state, config, true);
+
+ json_eat_whitespace(state);
+ if (RB_UNLIKELY(peek(state) != ':')) {
+ raise_parse_error("expected ':' after object key, got: %s", state);
+ }
+ state->cursor++;
+
+ json_parse_any(state, config);
+
+ continue;
+ }
+
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
+ }
+ break;
}
- }
-#line 534 "parser.rl"
- { p--; {p++; cs = 8; goto _out;} }
- goto st8;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
-#line 1589 "parser.c"
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 117 )
- goto st4;
- if ( 0 <= (*p) && (*p) <= 31 )
- goto st0;
- goto st2;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st5;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st5;
- } else
- goto st5;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st6;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st6;
- } else
- goto st6;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st7;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st7;
- } else
- goto st7;
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st2;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st2;
- } else
- goto st2;
- goto st0;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 560 "parser.rl"
-
- if (json->create_additions && RTEST(match_string = json->match_string)) {
- VALUE klass;
- VALUE memo = rb_ary_new2(2);
- rb_ary_push(memo, *result);
- rb_hash_foreach(match_string, match_i, memo);
- klass = rb_ary_entry(memo, 1);
- if (RTEST(klass)) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
- }
- }
- if (json->symbolize_names && json->parsing_name) {
- *result = rb_str_intern(*result);
- } else {
- if (RB_TYPE_P(*result, T_STRING)) {
- rb_str_resize(*result, RSTRING_LEN(*result));
- }
+ case 0:
+ raise_parse_error("unexpected end of input", state);
+ break;
+
+ default:
+ raise_parse_error("unexpected character: %s", state);
+ break;
}
- if (cs >= JSON_string_first_final) {
- return p + 1;
- } else {
- return NULL;
+
+ raise_parse_error("unreachable: %s", state);
+ return Qundef;
+}
+
+static void json_ensure_eof(JSON_ParserState *state)
+{
+ json_eat_whitespace(state);
+ if (!eos(state)) {
+ raise_parse_error("unexpected token at end of stream %s", state);
}
}
@@ -1702,26 +1411,84 @@ case 7:
static VALUE convert_encoding(VALUE source)
{
-#ifdef HAVE_RUBY_ENCODING_H
- rb_encoding *enc = rb_enc_get(source);
- if (enc == rb_ascii8bit_encoding()) {
- if (OBJ_FROZEN(source)) {
- source = rb_str_dup(source);
- }
- FORCE_UTF8(source);
- } else {
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
- }
-#endif
+ int encindex = RB_ENCODING_GET(source);
+
+ if (RB_LIKELY(encindex == utf8_encindex)) {
return source;
+ }
+
+ if (encindex == binary_encindex) {
+ // For historical reason, we silently reinterpret binary strings as UTF-8
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+ }
+
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
+}
+
+static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
+{
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
+
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
+ else if (key == sym_decimal_class) {
+ if (RTEST(val)) {
+ if (rb_respond_to(val, i_try_convert)) {
+ config->decimal_class = val;
+ config->decimal_method_id = i_try_convert;
+ } else if (rb_respond_to(val, i_new)) {
+ config->decimal_class = val;
+ config->decimal_method_id = i_new;
+ } else if (RB_TYPE_P(val, T_CLASS)) {
+ VALUE name = rb_class_name(val);
+ const char *name_cstr = RSTRING_PTR(name);
+ const char *last_colon = strrchr(name_cstr, ':');
+ if (last_colon) {
+ const char *mod_path_end = last_colon - 1;
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
+ config->decimal_class = rb_path_to_class(mod_path);
+
+ const char *method_name_beg = last_colon + 1;
+ long before_len = method_name_beg - name_cstr;
+ long len = RSTRING_LEN(name) - before_len;
+ VALUE method_name = rb_str_substr(name, before_len, len);
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
+ } else {
+ config->decimal_class = rb_mKernel;
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
+ }
+ }
+ }
+ }
+
+ return ST_CONTINUE;
+}
+
+static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
+{
+ config->max_nesting = 100;
+
+ if (!NIL_P(opts)) {
+ Check_Type(opts, T_HASH);
+ if (RHASH_SIZE(opts) > 0) {
+ // We assume in most cases few keys are set so it's faster to go over
+ // the provided keys than to check all possible keys.
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
+ }
+
+ }
}
/*
- * call-seq: new(source, opts => {})
- *
- * Creates a new JSON::Ext::Parser instance for the string _source_.
+ * call-seq: new(opts => {})
*
- * Creates a new JSON::Ext::Parser instance for the string _source_.
+ * Creates a new JSON::Ext::ParserConfig instance.
*
* It will be configured by the _opts_ hash. _opts_ can have the following
* keys:
@@ -1737,366 +1504,136 @@ static VALUE convert_encoding(VALUE source)
* (keys) in a JSON object. Otherwise strings are returned, which is
* also the default. It's not possible to use this option in
* conjunction with the *create_additions* option.
- * * *create_additions*: If set to false, the Parser doesn't create
- * additions even if a matching class and create_id was found. This option
- * defaults to false.
- * * *object_class*: Defaults to Hash
- * * *array_class*: Defaults to Array
+ * * *decimal_class*: Specifies which class to use instead of the default
+ * (Float) when parsing decimal numbers. This class must accept a single
+ * string argument in its constructor.
*/
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
+static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
{
- VALUE source, opts;
- GET_PARSER_INIT;
+ rb_check_frozen(self);
+ GET_PARSER_CONFIG;
+
+ parser_config_init(config, opts);
+
+ RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
-#ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
- rb_scan_args(argc, argv, "1:", &source, &opts);
-#else
- rb_scan_args(argc, argv, "11", &source, &opts);
-#endif
- if (!NIL_P(opts)) {
-#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
- opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
- if (NIL_P(opts)) {
- rb_raise(rb_eArgError, "opts needs to be like a hash");
- } else {
-#endif
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 100;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
- } else {
- json->create_additions = 0;
- }
- if (json->symbolize_names && json->create_additions) {
- rb_raise(rb_eArgError,
- "options :symbolize_names and :create_additions cannot be "
- " used in conjunction");
- }
- tmp = ID2SYM(i_create_id);
- if (option_given_p(opts, tmp)) {
- json->create_id = rb_hash_aref(opts, tmp);
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- tmp = ID2SYM(i_decimal_class);
- if (option_given_p(opts, tmp)) {
- json->decimal_class = rb_hash_aref(opts, tmp);
- } else {
- json->decimal_class = Qnil;
- }
- tmp = ID2SYM(i_match_string);
- if (option_given_p(opts, tmp)) {
- VALUE match_string = rb_hash_aref(opts, tmp);
- json->match_string = RTEST(match_string) ? match_string : Qnil;
- } else {
- json->match_string = Qnil;
- }
-#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
- }
-#endif
- } else {
- json->max_nesting = 100;
- json->allow_nan = 0;
- json->create_additions = 1;
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- json->object_class = Qnil;
- json->array_class = Qnil;
- json->decimal_class = Qnil;
- }
- source = convert_encoding(StringValue(source));
- StringValue(source);
- json->len = RSTRING_LEN(source);
- json->source = RSTRING_PTR(source);;
- json->Vsource = source;
return self;
}
+static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
+{
+ Vsource = convert_encoding(StringValue(Vsource));
+ StringValue(Vsource);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
-#line 1854 "parser.c"
-enum {JSON_start = 1};
-enum {JSON_first_final = 10};
-enum {JSON_error = 0};
+ long len;
+ const char *start;
+ RSTRING_GETMEM(Vsource, start, len);
-enum {JSON_en_main = 1};
+ JSON_ParserState _state = {
+ .start = start,
+ .cursor = start,
+ .end = start + len,
+ .stack = &stack,
+ };
+ JSON_ParserState *state = &_state;
+ VALUE result = json_parse_any(state, config);
-#line 762 "parser.rl"
+ // This may be skipped in case of exception, but
+ // it won't cause a leak.
+ rvalue_stack_eagerly_release(state->stack_handle);
+ json_ensure_eof(state);
+
+ return result;
+}
/*
- * call-seq: parse()
+ * call-seq: parse(source)
*
* Parses the current JSON text _source_ and returns the complete data
* structure as a result.
+ * It raises JSON::ParserError if fail to parse.
*/
-static VALUE cParser_parse(VALUE self)
+static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
{
- char *p, *pe;
- int cs = EVIL;
- VALUE result = Qnil;
- GET_PARSER;
-
-
-#line 1879 "parser.c"
- {
- cs = JSON_start;
- }
-
-#line 778 "parser.rl"
- p = json->source;
- pe = p + json->len;
-
-#line 1888 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-st1:
- if ( ++p == pe )
- goto _test_eof1;
-case 1:
- switch( (*p) ) {
- case 13: goto st1;
- case 32: goto st1;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st6;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st1;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-tr2:
-#line 754 "parser.rl"
- {
- char *np = JSON_parse_value(json, p, pe, &result, 0);
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
- }
- goto st10;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
-#line 1932 "parser.c"
- switch( (*p) ) {
- case 13: goto st10;
- case 32: goto st10;
- case 47: goto st2;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st10;
- goto st0;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 42: goto st3;
- case 47: goto st5;
- }
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 42 )
- goto st4;
- goto st3;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st4;
- case 47: goto st10;
- }
- goto st3;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 10 )
- goto st10;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st9;
- }
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 42 )
- goto st8;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 42: goto st8;
- case 47: goto st1;
- }
- goto st7;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- if ( (*p) == 10 )
- goto st1;
- goto st9;
- }
- _test_eof1: cs = 1; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 781 "parser.rl"
-
- if (cs >= JSON_first_final && p == pe) {
- return result;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return Qnil;
- }
+ GET_PARSER_CONFIG;
+ return cParser_parse(config, Vsource);
+}
+
+static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
+{
+ Vsource = convert_encoding(StringValue(Vsource));
+ StringValue(Vsource);
+
+ JSON_ParserConfig _config = {0};
+ JSON_ParserConfig *config = &_config;
+ parser_config_init(config, opts);
+
+ return cParser_parse(config, Vsource);
}
-static void JSON_mark(void *ptr)
+static void JSON_ParserConfig_mark(void *ptr)
{
- JSON_Parser *json = ptr;
- rb_gc_mark_maybe(json->Vsource);
- rb_gc_mark_maybe(json->create_id);
- rb_gc_mark_maybe(json->object_class);
- rb_gc_mark_maybe(json->array_class);
- rb_gc_mark_maybe(json->decimal_class);
- rb_gc_mark_maybe(json->match_string);
+ JSON_ParserConfig *config = ptr;
+ rb_gc_mark(config->on_load_proc);
+ rb_gc_mark(config->decimal_class);
}
-static void JSON_free(void *ptr)
+static void JSON_ParserConfig_free(void *ptr)
{
- JSON_Parser *json = ptr;
- fbuffer_free(json->fbuffer);
- ruby_xfree(json);
+ JSON_ParserConfig *config = ptr;
+ ruby_xfree(config);
}
-static size_t JSON_memsize(const void *ptr)
+static size_t JSON_ParserConfig_memsize(const void *ptr)
{
- const JSON_Parser *json = ptr;
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
+ return sizeof(JSON_ParserConfig);
}
-#ifdef NEW_TYPEDDATA_WRAPPER
-static const rb_data_type_t JSON_Parser_type = {
- "JSON/Parser",
- {JSON_mark, JSON_free, JSON_memsize,},
-#ifdef RUBY_TYPED_FREE_IMMEDIATELY
+static const rb_data_type_t JSON_ParserConfig_type = {
+ "JSON::Ext::Parser/ParserConfig",
+ {
+ JSON_ParserConfig_mark,
+ JSON_ParserConfig_free,
+ JSON_ParserConfig_memsize,
+ },
0, 0,
- RUBY_TYPED_FREE_IMMEDIATELY,
-#endif
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
};
-#endif
static VALUE cJSON_parser_s_allocate(VALUE klass)
{
- JSON_Parser *json;
- VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
- json->fbuffer = fbuffer_alloc(0);
- return obj;
-}
-
-/*
- * call-seq: source()
- *
- * Returns a copy of the current _source_ string, that was used to construct
- * this Parser.
- */
-static VALUE cParser_source(VALUE self)
-{
- GET_PARSER;
- return rb_str_dup(json->Vsource);
+ JSON_ParserConfig *config;
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
}
void Init_parser(void)
{
+#ifdef HAVE_RB_EXT_RACTOR_SAFE
+ rb_ext_ractor_safe(true);
+#endif
+
#undef rb_intern
rb_require("json/common");
mJSON = rb_define_module("JSON");
- mExt = rb_define_module_under(mJSON, "Ext");
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
- eParserError = rb_path2class("JSON::ParserError");
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
eNestingError = rb_path2class("JSON::NestingError");
- rb_gc_register_mark_object(eParserError);
rb_gc_register_mark_object(eNestingError);
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
- rb_define_method(cParser, "parse", cParser_parse, 0);
- rb_define_method(cParser, "source", cParser_source, 0);
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
+
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
rb_gc_register_mark_object(CNaN);
@@ -2107,32 +1644,29 @@ void Init_parser(void)
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
rb_gc_register_mark_object(CMinusInfinity);
- i_json_creatable_p = rb_intern("json_creatable?");
- i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
- i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_decimal_class = rb_intern("decimal_class");
- i_match = rb_intern("match");
- i_match_string = rb_intern("match_string");
- i_key_p = rb_intern("key?");
- i_deep_const_get = rb_intern("deep_const_get");
- i_aset = rb_intern("[]=");
- i_aref = rb_intern("[]");
- i_leftshift = rb_intern("<<");
+ rb_global_variable(&Encoding_UTF_8);
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
+ sym_freeze = ID2SYM(rb_intern("freeze"));
+ sym_on_load = ID2SYM(rb_intern("on_load"));
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
+
i_new = rb_intern("new");
- i_BigDecimal = rb_intern("BigDecimal");
-}
+ i_try_convert = rb_intern("try_convert");
+ i_uminus = rb_intern("-@");
+ i_encode = rb_intern("encode");
-/*
- * Local variables:
- * mode: c
- * c-file-style: ruby
- * indent-tabs-mode: nil
- * End:
- */
+ binary_encindex = rb_ascii8bit_encindex();
+ utf8_encindex = rb_utf8_encindex();
+ enc_utf8 = rb_utf8_encoding();
+
+#ifdef HAVE_SIMD
+ simd_impl = find_simd_implementation();
+#endif
+}
diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h
deleted file mode 100644
index e6cf779024..0000000000
--- a/ext/json/parser/parser.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _PARSER_H_
-#define _PARSER_H_
-
-#include "ruby.h"
-
-#ifndef HAVE_RUBY_RE_H
-#include "re.h"
-#endif
-
-#ifdef HAVE_RUBY_ST_H
-#include "ruby/st.h"
-#else
-#include "st.h"
-#endif
-
-#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
-
-/* unicode */
-
-typedef unsigned long UTF32; /* at least 32 bits */
-typedef unsigned short UTF16; /* at least 16 bits */
-typedef unsigned char UTF8; /* typically 8 bits */
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_SUR_HIGH_START (UTF32)0xD800
-#define UNI_SUR_HIGH_END (UTF32)0xDBFF
-#define UNI_SUR_LOW_START (UTF32)0xDC00
-#define UNI_SUR_LOW_END (UTF32)0xDFFF
-
-typedef struct JSON_ParserStruct {
- VALUE Vsource;
- char *source;
- long len;
- char *memo;
- VALUE create_id;
- int max_nesting;
- int allow_nan;
- int parsing_name;
- int symbolize_names;
- VALUE object_class;
- VALUE array_class;
- VALUE decimal_class;
- int create_additions;
- VALUE match_string;
- FBuffer *fbuffer;
-} JSON_Parser;
-
-#define GET_PARSER \
- GET_PARSER_INIT; \
- if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
-#define GET_PARSER_INIT \
- JSON_Parser *json; \
- TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
-
-#define MinusInfinity "-Infinity"
-#define EVIL 0x666
-
-static UTF32 unescape_unicode(const unsigned char *p);
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch);
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd);
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static VALUE convert_encoding(VALUE source);
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);
-static VALUE cParser_parse(VALUE self);
-static void JSON_mark(void *json);
-static void JSON_free(void *json);
-static VALUE cJSON_parser_s_allocate(VALUE klass);
-static VALUE cParser_source(VALUE self);
-#ifndef ZALLOC
-#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type)))
-static inline void *ruby_zalloc(size_t n)
-{
- void *p = ruby_xmalloc(n);
- memset(p, 0, n);
- return p;
-}
-#endif
-#ifdef TypedData_Make_Struct
-static const rb_data_type_t JSON_Parser_type;
-#define NEW_TYPEDDATA_WRAPPER 1
-#else
-#define TypedData_Make_Struct(klass, type, ignore, json) Data_Make_Struct(klass, type, NULL, JSON_free, json)
-#define TypedData_Get_Struct(self, JSON_Parser, ignore, json) Data_Get_Struct(self, JSON_Parser, json)
-#endif
-
-#endif
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
deleted file mode 100644
index d4e7a60e9d..0000000000
--- a/ext/json/parser/parser.rl
+++ /dev/null
@@ -1,891 +0,0 @@
-#include "../fbuffer/fbuffer.h"
-#include "parser.h"
-
-#if defined HAVE_RUBY_ENCODING_H
-# define EXC_ENCODING rb_utf8_encoding(),
-# ifndef HAVE_RB_ENC_RAISE
-static void
-enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
-{
- va_list args;
- VALUE mesg;
-
- va_start(args, fmt);
- mesg = rb_enc_vsprintf(enc, fmt, args);
- va_end(args);
-
- rb_exc_raise(rb_exc_new3(exc, mesg));
-}
-# define rb_enc_raise enc_raise
-# endif
-#else
-# define EXC_ENCODING /* nothing */
-# define rb_enc_raise rb_raise
-#endif
-
-/* unicode */
-
-static const char digit_values[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1
-};
-
-static UTF32 unescape_unicode(const unsigned char *p)
-{
- char b;
- UTF32 result = 0;
- b = digit_values[p[0]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[1]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[2]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[3]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | (unsigned char)b;
- return result;
-}
-
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
-{
- int len = 1;
- if (ch <= 0x7F) {
- buf[0] = (char) ch;
- } else if (ch <= 0x07FF) {
- buf[0] = (char) ((ch >> 6) | 0xC0);
- buf[1] = (char) ((ch & 0x3F) | 0x80);
- len++;
- } else if (ch <= 0xFFFF) {
- buf[0] = (char) ((ch >> 12) | 0xE0);
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
- buf[2] = (char) ((ch & 0x3F) | 0x80);
- len += 2;
- } else if (ch <= 0x1fffff) {
- buf[0] =(char) ((ch >> 18) | 0xF0);
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
- buf[3] =(char) ((ch & 0x3F) | 0x80);
- len += 3;
- } else {
- buf[0] = '?';
- }
- return len;
-}
-
-static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-static VALUE cBigDecimal = Qundef;
-
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
- i_object_class, i_array_class, i_decimal_class, i_key_p,
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
- i_leftshift, i_new, i_BigDecimal;
-
-%%{
- machine JSON_common;
-
- cr = '\n';
- cr_neg = [^\n];
- ws = [ \t\r\n];
- c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
- cpp_comment = '//' cr_neg* cr;
- comment = c_comment | cpp_comment;
- ignore = ws | comment;
- name_separator = ':';
- value_separator = ',';
- Vnull = 'null';
- Vfalse = 'false';
- Vtrue = 'true';
- VNaN = 'NaN';
- VInfinity = 'Infinity';
- VMinusInfinity = '-Infinity';
- begin_value = [nft\"\-\[\{NI] | digit;
- begin_object = '{';
- end_object = '}';
- begin_array = '[';
- end_array = ']';
- begin_string = '"';
- begin_name = begin_string;
- begin_number = digit | '-';
-}%%
-
-%%{
- machine JSON_object;
- include JSON_common;
-
- write data;
-
- action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
- if (np == NULL) {
- fhold; fbreak;
- } else {
- if (NIL_P(json->object_class)) {
- rb_hash_aset(*result, last_name, v);
- } else {
- rb_funcall(*result, i_aset, 2, last_name, v);
- }
- fexec np;
- }
- }
-
- action parse_name {
- char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, fpc, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action exit { fhold; fbreak; }
-
- pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value;
- next_pair = ignore* value_separator pair;
-
- main := (
- begin_object
- (pair (next_pair)*)? ignore*
- end_object
- ) @exit;
-}%%
-
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
-
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
-
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
-
- %% write init;
- %% write exec;
-
- if (cs >= JSON_object_first_final) {
- if (json->create_additions) {
- VALUE klassname;
- if (NIL_P(json->object_class)) {
- klassname = rb_hash_aref(*result, json->create_id);
- } else {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
- }
- if (!NIL_P(klassname)) {
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
- }
- }
- }
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-%%{
- machine JSON_value;
- include JSON_common;
-
- write data;
-
- action parse_null {
- *result = Qnil;
- }
- action parse_false {
- *result = Qfalse;
- }
- action parse_true {
- *result = Qtrue;
- }
- action parse_nan {
- if (json->allow_nan) {
- *result = CNaN;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
- }
- }
- action parse_infinity {
- if (json->allow_nan) {
- *result = CInfinity;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
- }
- }
- action parse_string {
- char *np = JSON_parse_string(json, fpc, pe, result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_number {
- char *np;
- if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) {
- if (json->allow_nan) {
- *result = CMinusInfinity;
- fexec p + 10;
- fhold; fbreak;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- }
- }
- np = JSON_parse_float(json, fpc, pe, result);
- if (np != NULL) fexec np;
- np = JSON_parse_integer(json, fpc, pe, result);
- if (np != NULL) fexec np;
- fhold; fbreak;
- }
-
- action parse_array {
- char *np;
- np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_object {
- char *np;
- np = JSON_parse_object(json, fpc, pe, result, current_nesting + 1);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action exit { fhold; fbreak; }
-
-main := ignore* (
- Vnull @parse_null |
- Vfalse @parse_false |
- Vtrue @parse_true |
- VNaN @parse_nan |
- VInfinity @parse_infinity |
- begin_number >parse_number |
- begin_string >parse_string |
- begin_array >parse_array |
- begin_object >parse_object
- ) ignore* %*exit;
-}%%
-
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
-
- %% write init;
- %% write exec;
-
- if (cs >= JSON_value_first_final) {
- return p;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_integer;
-
- write data;
-
- action exit { fhold; fbreak; }
-
- main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
-}%%
-
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- %% write init;
- json->memo = p;
- %% write exec;
-
- if (cs >= JSON_integer_first_final) {
- long len = p - json->memo;
- fbuffer_clear(json->fbuffer);
- fbuffer_append(json->fbuffer, json->memo, len);
- fbuffer_append_char(json->fbuffer, '\0');
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_float;
- include JSON_common;
-
- write data;
-
- action exit { fhold; fbreak; }
-
- main := '-'? (
- (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
- | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
- ) (^[0-9Ee.\-]? @exit );
-}%%
-
-static int is_bigdecimal_class(VALUE obj)
-{
- if (cBigDecimal == Qundef) {
- if (rb_const_defined(rb_cObject, i_BigDecimal)) {
- cBigDecimal = rb_const_get_at(rb_cObject, i_BigDecimal);
- }
- else {
- return 0;
- }
- }
- return obj == cBigDecimal;
-}
-
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- %% write init;
- json->memo = p;
- %% write exec;
-
- if (cs >= JSON_float_first_final) {
- long len = p - json->memo;
- fbuffer_clear(json->fbuffer);
- fbuffer_append(json->fbuffer, json->memo, len);
- fbuffer_append_char(json->fbuffer, '\0');
- if (NIL_P(json->decimal_class)) {
- *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
- } else {
- VALUE text;
- text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
- if (is_bigdecimal_class(json->decimal_class)) {
- *result = rb_funcall(Qnil, i_BigDecimal, 1, text);
- } else {
- *result = rb_funcall(json->decimal_class, i_new, 1, text);
- }
- }
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-%%{
- machine JSON_array;
- include JSON_common;
-
- write data;
-
- action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
- if (np == NULL) {
- fhold; fbreak;
- } else {
- if (NIL_P(json->array_class)) {
- rb_ary_push(*result, v);
- } else {
- rb_funcall(*result, i_leftshift, 1, v);
- }
- fexec np;
- }
- }
-
- action exit { fhold; fbreak; }
-
- next_element = value_separator ignore* begin_value >parse_value;
-
- main := begin_array ignore*
- ((begin_value >parse_value ignore*)
- (ignore* next_element ignore*)*)?
- end_array @exit;
-}%%
-
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
-{
- int cs = EVIL;
- VALUE array_class = json->array_class;
-
- if (json->max_nesting && current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
- }
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
-
- %% write init;
- %% write exec;
-
- if(cs >= JSON_array_first_final) {
- return p + 1;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return NULL;
- }
-}
-
-static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
-{
- char *p = string, *pe = string, *unescape;
- int unescape_len;
- char buf[4];
-
- while (pe < stringEnd) {
- if (*pe == '\\') {
- unescape = (char *) "?";
- unescape_len = 1;
- if (pe > p) rb_str_buf_cat(result, p, pe - p);
- switch (*++pe) {
- case 'n':
- unescape = (char *) "\n";
- break;
- case 'r':
- unescape = (char *) "\r";
- break;
- case 't':
- unescape = (char *) "\t";
- break;
- case '"':
- unescape = (char *) "\"";
- break;
- case '\\':
- unescape = (char *) "\\";
- break;
- case 'b':
- unescape = (char *) "\b";
- break;
- case 'f':
- unescape = (char *) "\f";
- break;
- case 'u':
- if (pe > stringEnd - 4) {
- rb_enc_raise(
- EXC_ENCODING eParserError,
- "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p
- );
- } else {
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
- pe += 3;
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
- pe++;
- if (pe > stringEnd - 6) {
- rb_enc_raise(
- EXC_ENCODING eParserError,
- "%u: incomplete surrogate pair at '%s'", __LINE__, p
- );
- }
- if (pe[0] == '\\' && pe[1] == 'u') {
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
- | (sur & 0x3FF));
- pe += 5;
- } else {
- unescape = (char *) "?";
- break;
- }
- }
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
- unescape = buf;
- }
- break;
- default:
- p = pe;
- continue;
- }
- rb_str_buf_cat(result, unescape, unescape_len);
- p = ++pe;
- } else {
- pe++;
- }
- }
- rb_str_buf_cat(result, p, pe - p);
- return result;
-}
-
-%%{
- machine JSON_string;
- include JSON_common;
-
- write data;
-
- action parse_string {
- *result = json_string_unescape(*result, json->memo + 1, p);
- if (NIL_P(*result)) {
- fhold;
- fbreak;
- } else {
- FORCE_UTF8(*result);
- fexec p + 1;
- }
- }
-
- action exit { fhold; fbreak; }
-
- main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
-}%%
-
-static int
-match_i(VALUE regexp, VALUE klass, VALUE memo)
-{
- if (regexp == Qundef) return ST_STOP;
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
- rb_ary_push(memo, klass);
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE match_string;
-
- *result = rb_str_buf_new(0);
- %% write init;
- json->memo = p;
- %% write exec;
-
- if (json->create_additions && RTEST(match_string = json->match_string)) {
- VALUE klass;
- VALUE memo = rb_ary_new2(2);
- rb_ary_push(memo, *result);
- rb_hash_foreach(match_string, match_i, memo);
- klass = rb_ary_entry(memo, 1);
- if (RTEST(klass)) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
- }
- }
-
- if (json->symbolize_names && json->parsing_name) {
- *result = rb_str_intern(*result);
- } else {
- if (RB_TYPE_P(*result, T_STRING)) {
- rb_str_resize(*result, RSTRING_LEN(*result));
- }
- }
- if (cs >= JSON_string_first_final) {
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-/*
- * Document-class: JSON::Ext::Parser
- *
- * This is the JSON parser implemented as a C extension. It can be configured
- * to be used by setting
- *
- * JSON.parser = JSON::Ext::Parser
- *
- * with the method parser= in JSON.
- *
- */
-
-static VALUE convert_encoding(VALUE source)
-{
-#ifdef HAVE_RUBY_ENCODING_H
- rb_encoding *enc = rb_enc_get(source);
- if (enc == rb_ascii8bit_encoding()) {
- if (OBJ_FROZEN(source)) {
- source = rb_str_dup(source);
- }
- FORCE_UTF8(source);
- } else {
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
- }
-#endif
- return source;
-}
-
-/*
- * call-seq: new(source, opts => {})
- *
- * Creates a new JSON::Ext::Parser instance for the string _source_.
- *
- * Creates a new JSON::Ext::Parser instance for the string _source_.
- *
- * It will be configured by the _opts_ hash. _opts_ can have the following
- * keys:
- *
- * _opts_ can have the following keys:
- * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
- * structures. Disable depth checking with :max_nesting => false|nil|0, it
- * defaults to 100.
- * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
- * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
- * false.
- * * *symbolize_names*: If set to true, returns symbols for the names
- * (keys) in a JSON object. Otherwise strings are returned, which is
- * also the default. It's not possible to use this option in
- * conjunction with the *create_additions* option.
- * * *create_additions*: If set to false, the Parser doesn't create
- * additions even if a matching class and create_id was found. This option
- * defaults to false.
- * * *object_class*: Defaults to Hash
- * * *array_class*: Defaults to Array
- */
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
-{
- VALUE source, opts;
- GET_PARSER_INIT;
-
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
-#ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
- rb_scan_args(argc, argv, "1:", &source, &opts);
-#else
- rb_scan_args(argc, argv, "11", &source, &opts);
-#endif
- if (!NIL_P(opts)) {
-#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
- opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
- if (NIL_P(opts)) {
- rb_raise(rb_eArgError, "opts needs to be like a hash");
- } else {
-#endif
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 100;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
- } else {
- json->create_additions = 0;
- }
- if (json->symbolize_names && json->create_additions) {
- rb_raise(rb_eArgError,
- "options :symbolize_names and :create_additions cannot be "
- " used in conjunction");
- }
- tmp = ID2SYM(i_create_id);
- if (option_given_p(opts, tmp)) {
- json->create_id = rb_hash_aref(opts, tmp);
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- tmp = ID2SYM(i_decimal_class);
- if (option_given_p(opts, tmp)) {
- json->decimal_class = rb_hash_aref(opts, tmp);
- } else {
- json->decimal_class = Qnil;
- }
- tmp = ID2SYM(i_match_string);
- if (option_given_p(opts, tmp)) {
- VALUE match_string = rb_hash_aref(opts, tmp);
- json->match_string = RTEST(match_string) ? match_string : Qnil;
- } else {
- json->match_string = Qnil;
- }
-#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
- }
-#endif
- } else {
- json->max_nesting = 100;
- json->allow_nan = 0;
- json->create_additions = 1;
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- json->object_class = Qnil;
- json->array_class = Qnil;
- json->decimal_class = Qnil;
- }
- source = convert_encoding(StringValue(source));
- StringValue(source);
- json->len = RSTRING_LEN(source);
- json->source = RSTRING_PTR(source);;
- json->Vsource = source;
- return self;
-}
-
-%%{
- machine JSON;
-
- write data;
-
- include JSON_common;
-
- action parse_value {
- char *np = JSON_parse_value(json, fpc, pe, &result, 0);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- main := ignore* (
- begin_value >parse_value
- ) ignore*;
-}%%
-
-/*
- * call-seq: parse()
- *
- * Parses the current JSON text _source_ and returns the complete data
- * structure as a result.
- */
-static VALUE cParser_parse(VALUE self)
-{
- char *p, *pe;
- int cs = EVIL;
- VALUE result = Qnil;
- GET_PARSER;
-
- %% write init;
- p = json->source;
- pe = p + json->len;
- %% write exec;
-
- if (cs >= JSON_first_final && p == pe) {
- return result;
- } else {
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return Qnil;
- }
-}
-
-static void JSON_mark(void *ptr)
-{
- JSON_Parser *json = ptr;
- rb_gc_mark_maybe(json->Vsource);
- rb_gc_mark_maybe(json->create_id);
- rb_gc_mark_maybe(json->object_class);
- rb_gc_mark_maybe(json->array_class);
- rb_gc_mark_maybe(json->decimal_class);
- rb_gc_mark_maybe(json->match_string);
-}
-
-static void JSON_free(void *ptr)
-{
- JSON_Parser *json = ptr;
- fbuffer_free(json->fbuffer);
- ruby_xfree(json);
-}
-
-static size_t JSON_memsize(const void *ptr)
-{
- const JSON_Parser *json = ptr;
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
-}
-
-#ifdef NEW_TYPEDDATA_WRAPPER
-static const rb_data_type_t JSON_Parser_type = {
- "JSON/Parser",
- {JSON_mark, JSON_free, JSON_memsize,},
-#ifdef RUBY_TYPED_FREE_IMMEDIATELY
- 0, 0,
- RUBY_TYPED_FREE_IMMEDIATELY,
-#endif
-};
-#endif
-
-static VALUE cJSON_parser_s_allocate(VALUE klass)
-{
- JSON_Parser *json;
- VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
- json->fbuffer = fbuffer_alloc(0);
- return obj;
-}
-
-/*
- * call-seq: source()
- *
- * Returns a copy of the current _source_ string, that was used to construct
- * this Parser.
- */
-static VALUE cParser_source(VALUE self)
-{
- GET_PARSER;
- return rb_str_dup(json->Vsource);
-}
-
-void Init_parser(void)
-{
-#undef rb_intern
- rb_require("json/common");
- mJSON = rb_define_module("JSON");
- mExt = rb_define_module_under(mJSON, "Ext");
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
- eParserError = rb_path2class("JSON::ParserError");
- eNestingError = rb_path2class("JSON::NestingError");
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
- rb_define_method(cParser, "parse", cParser_parse, 0);
- rb_define_method(cParser, "source", cParser_source, 0);
-
- CNaN = rb_const_get(mJSON, rb_intern("NaN"));
- CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
- CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
-
- i_json_creatable_p = rb_intern("json_creatable?");
- i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
- i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_decimal_class = rb_intern("decimal_class");
- i_match = rb_intern("match");
- i_match_string = rb_intern("match_string");
- i_key_p = rb_intern("key?");
- i_deep_const_get = rb_intern("deep_const_get");
- i_aset = rb_intern("[]=");
- i_aref = rb_intern("[]");
- i_leftshift = rb_intern("<<");
- i_new = rb_intern("new");
- i_BigDecimal = rb_intern("BigDecimal");
-}
-
-/*
- * Local variables:
- * mode: c
- * c-file-style: ruby
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk
deleted file mode 100644
index be7bcb4319..0000000000
--- a/ext/json/parser/prereq.mk
+++ /dev/null
@@ -1,10 +0,0 @@
-RAGEL = ragel
-
-.SUFFIXES: .rl
-
-.rl.c:
- $(RAGEL) -G2 $<
- $(BASERUBY) -pli -e '$$_.sub!(/[ \t]+$$/, "")' \
- -e '$$_.sub!(/^static const int (JSON_.*=.*);$$/, "enum {\\1};")' $@
-
-parser.c: