summaryrefslogtreecommitdiff
path: root/ext/socket/sockssocket.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/socket/sockssocket.c')
-rw-r--r--ext/socket/sockssocket.c9
1 files changed, 5 insertions, 4 deletions
diff --git a/ext/socket/sockssocket.c b/ext/socket/sockssocket.c
index b8b7e12998..30860ea257 100644
--- a/ext/socket/sockssocket.c
+++ b/ext/socket/sockssocket.c
@@ -30,11 +30,12 @@ socks_init(VALUE sock, VALUE host, VALUE port)
static int init = 0;
if (init == 0) {
- SOCKSinit("ruby");
- init = 1;
+ char progname[] = "ruby";
+ SOCKSinit(progname);
+ init = 1;
}
- return rsock_init_inetsock(sock, host, port, Qnil, Qnil, INET_SOCKS, Qnil, Qnil);
+ return rsock_init_inetsock(sock, host, port, Qnil, Qnil, INET_SOCKS, Qnil, Qnil, Qnil, Qfalse, Qnil);
}
#ifdef SOCKS5
@@ -48,7 +49,7 @@ socks_s_close(VALUE sock)
rb_io_t *fptr;
GetOpenFile(sock, fptr);
- shutdown(fptr->fd, 2);
+ shutdown(fptr->fd, SHUT_RDWR);
return rb_io_close(sock);
}
#endif
/td>196
-rw-r--r--ext/json/json.gemspec62
-rw-r--r--ext/json/json.h134
-rw-r--r--ext/json/lib/json.rb677
-rw-r--r--ext/json/lib/json/add/bigdecimal.rb58
-rw-r--r--ext/json/lib/json/add/complex.rb51
-rw-r--r--ext/json/lib/json/add/core.rb157
-rw-r--r--ext/json/lib/json/add/date.rb54
-rw-r--r--ext/json/lib/json/add/date_time.rb67
-rw-r--r--ext/json/lib/json/add/exception.rb49
-rw-r--r--ext/json/lib/json/add/ostruct.rb54
-rw-r--r--ext/json/lib/json/add/range.rb54
-rw-r--r--ext/json/lib/json/add/rational.rb49
-rw-r--r--ext/json/lib/json/add/regexp.rb48
-rw-r--r--ext/json/lib/json/add/set.rb48
-rw-r--r--ext/json/lib/json/add/string.rb35
-rw-r--r--ext/json/lib/json/add/struct.rb52
-rw-r--r--ext/json/lib/json/add/symbol.rb52
-rw-r--r--ext/json/lib/json/add/time.rb52
-rw-r--r--ext/json/lib/json/common.rb1329
-rw-r--r--ext/json/lib/json/ext.rb40
-rw-r--r--ext/json/lib/json/ext/generator/state.rb103
-rw-r--r--ext/json/lib/json/generic_object.rb67
-rw-r--r--ext/json/lib/json/version.rb9
-rw-r--r--ext/json/parser/depend183
-rw-r--r--ext/json/parser/extconf.rb20
-rw-r--r--ext/json/parser/parser.c3749
-rw-r--r--ext/json/parser/parser.h74
-rw-r--r--ext/json/parser/parser.rl804
-rw-r--r--ext/json/parser/prereq.mk8
-rw-r--r--ext/json/simd/conf.rb24
-rw-r--r--ext/json/simd/simd.h208
-rw-r--r--ext/json/vendor/fpconv.c480
-rw-r--r--ext/json/vendor/jeaiii-ltoa.h267
-rw-r--r--ext/json/vendor/ryu.h819
41 files changed, 8811 insertions, 4279 deletions
diff --git a/ext/json/depend b/ext/json/depend
new file mode 100644
index 0000000000..0301ce074c
--- /dev/null
+++ b/ext/json/depend
@@ -0,0 +1,2 @@
+# AUTOGENERATED DEPENDENCIES START
+# AUTOGENERATED DEPENDENCIES END
diff --git a/ext/json/extconf.rb b/ext/json/extconf.rb
index 850798c643..8a99b6a5c8 100644
--- a/ext/json/extconf.rb
+++ b/ext/json/extconf.rb
@@ -1,3 +1,3 @@
require 'mkmf'
-create_makefile('json')
+create_makefile('json')
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h
new file mode 100644
index 0000000000..b4f5266ca5
--- /dev/null
+++ b/ext/json/fbuffer/fbuffer.h
@@ -0,0 +1,260 @@
+#ifndef _FBUFFER_H_
+#define _FBUFFER_H_
+
+#include "../json.h"
+#include "../vendor/jeaiii-ltoa.h"
+
+enum fbuffer_type {
+ FBUFFER_HEAP_ALLOCATED = 0,
+ FBUFFER_STACK_ALLOCATED = 1,
+};
+
+typedef struct FBufferStruct {
+ enum fbuffer_type type;
+ size_t initial_length;
+ size_t len;
+ size_t capa;
+#if JSON_DEBUG
+ size_t requested;
+#endif
+ char *ptr;
+ VALUE io;
+} FBuffer;
+
+#define FBUFFER_STACK_SIZE 512
+#define FBUFFER_IO_BUFFER_SIZE (16384 - 1)
+#define FBUFFER_INITIAL_LENGTH_DEFAULT 1024
+
+#define FBUFFER_PTR(fb) ((fb)->ptr)
+#define FBUFFER_LEN(fb) ((fb)->len)
+#define FBUFFER_CAPA(fb) ((fb)->capa)
+#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb)
+
+static void fbuffer_free(FBuffer *fb);
+static void fbuffer_clear(FBuffer *fb);
+static void fbuffer_append(FBuffer *fb, const char *newstr, size_t len);
+static void fbuffer_append_long(FBuffer *fb, long number);
+static inline void fbuffer_append_char(FBuffer *fb, char newchr);
+static VALUE fbuffer_finalize(FBuffer *fb);
+
+static void fbuffer_stack_init(FBuffer *fb, size_t initial_length, char *stack_buffer, size_t stack_buffer_size)
+{
+ fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT;
+ if (stack_buffer) {
+ fb->type = FBUFFER_STACK_ALLOCATED;
+ fb->ptr = stack_buffer;
+ fb->capa = stack_buffer_size;
+ }
+#if JSON_DEBUG
+ fb->requested = 0;
+#endif
+}
+
+static inline void fbuffer_consumed(FBuffer *fb, size_t consumed)
+{
+#if JSON_DEBUG
+ if (consumed > fb->requested) {
+ rb_bug("fbuffer: Out of bound write");
+ }
+ fb->requested = 0;
+#endif
+ fb->len += consumed;
+}
+
+static void fbuffer_free(FBuffer *fb)
+{
+ if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) {
+ JSON_SIZED_FREE_N(fb->ptr, fb->capa);
+ }
+}
+
+static void fbuffer_clear(FBuffer *fb)
+{
+ fb->len = 0;
+}
+
+static void fbuffer_flush(FBuffer *fb)
+{
+ rb_io_write(fb->io, rb_utf8_str_new(fb->ptr, fb->len));
+ fbuffer_clear(fb);
+}
+
+static void fbuffer_realloc(FBuffer *fb, size_t required)
+{
+ if (required > fb->capa) {
+ if (fb->type == FBUFFER_STACK_ALLOCATED) {
+ const char *old_buffer = fb->ptr;
+ fb->ptr = ALLOC_N(char, required);
+ fb->type = FBUFFER_HEAP_ALLOCATED;
+ MEMCPY(fb->ptr, old_buffer, char, fb->len);
+ } else {
+ JSON_SIZED_REALLOC_N(fb->ptr, char, required, fb->capa);
+ }
+ fb->capa = required;
+ }
+}
+
+static void fbuffer_do_inc_capa(FBuffer *fb, size_t requested)
+{
+ if (RB_UNLIKELY(fb->io)) {
+ if (fb->capa < FBUFFER_IO_BUFFER_SIZE) {
+ fbuffer_realloc(fb, FBUFFER_IO_BUFFER_SIZE);
+ } else {
+ fbuffer_flush(fb);
+ }
+
+ if (RB_LIKELY(requested < fb->capa)) {
+ return;
+ }
+ }
+
+ size_t required;
+
+ if (RB_UNLIKELY(!fb->ptr)) {
+ fb->ptr = ALLOC_N(char, fb->initial_length);
+ fb->capa = fb->initial_length;
+ }
+
+ for (required = fb->capa; requested > required - fb->len; required <<= 1);
+
+ fbuffer_realloc(fb, required);
+}
+
+static inline void fbuffer_inc_capa(FBuffer *fb, size_t requested)
+{
+#if JSON_DEBUG
+ fb->requested = requested;
+#endif
+
+ if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
+ fbuffer_do_inc_capa(fb, requested);
+ }
+}
+
+static inline size_t fbuffer_size_mul_or_raise(size_t a, size_t b)
+{
+ size_t result = a * b;
+ if (RB_UNLIKELY(a != 0 && (result / a) != b)) {
+ rb_raise(rb_eArgError, "Buffer overflow, the resulting document is too large to be generated");
+ }
+ return result;
+}
+
+static inline void fbuffer_append_reserved(FBuffer *fb, const char *newstr, size_t len)
+{
+ MEMCPY(fb->ptr + fb->len, newstr, char, len);
+ fbuffer_consumed(fb, len);
+}
+
+static inline void fbuffer_append(FBuffer *fb, const char *newstr, size_t len)
+{
+ if (len > 0) {
+ fbuffer_inc_capa(fb, len);
+ fbuffer_append_reserved(fb, newstr, len);
+ }
+}
+
+/* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */
+static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr)
+{
+#if JSON_DEBUG
+ if (fb->requested < 1) {
+ rb_bug("fbuffer: unreserved write");
+ }
+ fb->requested--;
+#endif
+
+ fb->ptr[fb->len] = chr;
+ fb->len++;
+}
+
+static void fbuffer_append_str(FBuffer *fb, VALUE str)
+{
+ const char *ptr;
+ size_t len;
+ RSTRING_GETMEM(str, ptr, len);
+
+ fbuffer_append(fb, ptr, len);
+ RB_GC_GUARD(str);
+}
+
+static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat)
+{
+ const char *ptr;
+ size_t len;
+ RSTRING_GETMEM(str, ptr, len);
+
+ fbuffer_inc_capa(fb, fbuffer_size_mul_or_raise(repeat, len));
+ while (repeat) {
+#if JSON_DEBUG
+ fb->requested = len;
+#endif
+ fbuffer_append_reserved(fb, ptr, len);
+ repeat--;
+ }
+ RB_GC_GUARD(str);
+}
+
+static inline void fbuffer_append_char(FBuffer *fb, char newchr)
+{
+ fbuffer_inc_capa(fb, 1);
+ *(fb->ptr + fb->len) = newchr;
+ fbuffer_consumed(fb, 1);
+}
+
+static inline char *fbuffer_cursor(FBuffer *fb)
+{
+ return fb->ptr + fb->len;
+}
+
+static inline void fbuffer_advance_to(FBuffer *fb, char *end)
+{
+ fbuffer_consumed(fb, (end - fb->ptr) - fb->len);
+}
+
+/*
+ * Appends the decimal string representation of \a number into the buffer.
+ */
+static void fbuffer_append_long(FBuffer *fb, long number)
+{
+ /*
+ * The jeaiii_ultoa() function produces digits left-to-right,
+ * allowing us to write directly into the buffer, but we don't know
+ * the number of resulting characters.
+ *
+ * We do know, however, that the `number` argument is always in the
+ * range 0xc000000000000000 to 0x3fffffffffffffff, or, in decimal,
+ * -4611686018427387904 to 4611686018427387903. The max number of chars
+ * generated is therefore 20 (including a potential sign character).
+ */
+
+ static const int MAX_CHARS_FOR_LONG = 20;
+
+ fbuffer_inc_capa(fb, MAX_CHARS_FOR_LONG);
+
+ if (number < 0) {
+ fbuffer_append_reserved_char(fb, '-');
+
+ /*
+ * Since number is always > LONG_MIN, `-number` will not overflow
+ * and is always the positive abs() value.
+ */
+ number = -number;
+ }
+
+ char *end = jeaiii_ultoa(fbuffer_cursor(fb), number);
+ fbuffer_advance_to(fb, end);
+}
+
+static VALUE fbuffer_finalize(FBuffer *fb)
+{
+ if (fb->io) {
+ fbuffer_flush(fb);
+ rb_io_flush(fb->io);
+ return fb->io;
+ } else {
+ return rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
+ }
+}
+
+#endif // _FBUFFER_H_
diff --git a/ext/json/generator/depend b/ext/json/generator/depend
index bb76ad6400..3ba4acfdd2 100644
--- a/ext/json/generator/depend
+++ b/ext/json/generator/depend
@@ -1 +1,186 @@
-generator.o: generator.c generator.h
+$(OBJS): $(ruby_headers)
+generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h
+
+# AUTOGENERATED DEPENDENCIES START
+generator.o: $(RUBY_EXTCONF_H)
+generator.o: $(arch_hdrdir)/ruby/config.h
+generator.o: $(hdrdir)/ruby.h
+generator.o: $(hdrdir)/ruby/assert.h
+generator.o: $(hdrdir)/ruby/backward.h
+generator.o: $(hdrdir)/ruby/backward/2/assume.h
+generator.o: $(hdrdir)/ruby/backward/2/attributes.h
+generator.o: $(hdrdir)/ruby/backward/2/bool.h
+generator.o: $(hdrdir)/ruby/backward/2/inttypes.h
+generator.o: $(hdrdir)/ruby/backward/2/limits.h
+generator.o: $(hdrdir)/ruby/backward/2/long_long.h
+generator.o: $(hdrdir)/ruby/backward/2/stdalign.h
+generator.o: $(hdrdir)/ruby/backward/2/stdarg.h
+generator.o: $(hdrdir)/ruby/defines.h
+generator.o: $(hdrdir)/ruby/encoding.h
+generator.o: $(hdrdir)/ruby/intern.h
+generator.o: $(hdrdir)/ruby/internal/abi.h
+generator.o: $(hdrdir)/ruby/internal/anyargs.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/char.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/double.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/int.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/long.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/short.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h
+generator.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h
+generator.o: $(hdrdir)/ruby/internal/assume.h
+generator.o: $(hdrdir)/ruby/internal/attr/alloc_size.h
+generator.o: $(hdrdir)/ruby/internal/attr/artificial.h
+generator.o: $(hdrdir)/ruby/internal/attr/cold.h
+generator.o: $(hdrdir)/ruby/internal/attr/const.h
+generator.o: $(hdrdir)/ruby/internal/attr/constexpr.h
+generator.o: $(hdrdir)/ruby/internal/attr/deprecated.h
+generator.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h
+generator.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h
+generator.o: $(hdrdir)/ruby/internal/attr/error.h
+generator.o: $(hdrdir)/ruby/internal/attr/flag_enum.h
+generator.o: $(hdrdir)/ruby/internal/attr/forceinline.h
+generator.o: $(hdrdir)/ruby/internal/attr/format.h
+generator.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h
+generator.o: $(hdrdir)/ruby/internal/attr/noalias.h
+generator.o: $(hdrdir)/ruby/internal/attr/nodiscard.h
+generator.o: $(hdrdir)/ruby/internal/attr/noexcept.h
+generator.o: $(hdrdir)/ruby/internal/attr/noinline.h
+generator.o: $(hdrdir)/ruby/internal/attr/nonnull.h
+generator.o: $(hdrdir)/ruby/internal/attr/noreturn.h
+generator.o: $(hdrdir)/ruby/internal/attr/packed_struct.h
+generator.o: $(hdrdir)/ruby/internal/attr/pure.h
+generator.o: $(hdrdir)/ruby/internal/attr/restrict.h
+generator.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h
+generator.o: $(hdrdir)/ruby/internal/attr/warning.h
+generator.o: $(hdrdir)/ruby/internal/attr/weakref.h
+generator.o: $(hdrdir)/ruby/internal/cast.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is/apple.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is/clang.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is/intel.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h
+generator.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h
+generator.o: $(hdrdir)/ruby/internal/compiler_since.h
+generator.o: $(hdrdir)/ruby/internal/config.h
+generator.o: $(hdrdir)/ruby/internal/constant_p.h
+generator.o: $(hdrdir)/ruby/internal/core.h
+generator.o: $(hdrdir)/ruby/internal/core/rarray.h
+generator.o: $(hdrdir)/ruby/internal/core/rbasic.h
+generator.o: $(hdrdir)/ruby/internal/core/rbignum.h
+generator.o: $(hdrdir)/ruby/internal/core/rclass.h
+generator.o: $(hdrdir)/ruby/internal/core/rdata.h
+generator.o: $(hdrdir)/ruby/internal/core/rfile.h
+generator.o: $(hdrdir)/ruby/internal/core/rhash.h
+generator.o: $(hdrdir)/ruby/internal/core/rmatch.h
+generator.o: $(hdrdir)/ruby/internal/core/robject.h
+generator.o: $(hdrdir)/ruby/internal/core/rregexp.h
+generator.o: $(hdrdir)/ruby/internal/core/rstring.h
+generator.o: $(hdrdir)/ruby/internal/core/rstruct.h
+generator.o: $(hdrdir)/ruby/internal/core/rtypeddata.h
+generator.o: $(hdrdir)/ruby/internal/ctype.h
+generator.o: $(hdrdir)/ruby/internal/dllexport.h
+generator.o: $(hdrdir)/ruby/internal/dosish.h
+generator.o: $(hdrdir)/ruby/internal/encoding/coderange.h
+generator.o: $(hdrdir)/ruby/internal/encoding/ctype.h
+generator.o: $(hdrdir)/ruby/internal/encoding/encoding.h
+generator.o: $(hdrdir)/ruby/internal/encoding/pathname.h
+generator.o: $(hdrdir)/ruby/internal/encoding/re.h
+generator.o: $(hdrdir)/ruby/internal/encoding/sprintf.h
+generator.o: $(hdrdir)/ruby/internal/encoding/string.h
+generator.o: $(hdrdir)/ruby/internal/encoding/symbol.h
+generator.o: $(hdrdir)/ruby/internal/encoding/transcode.h
+generator.o: $(hdrdir)/ruby/internal/error.h
+generator.o: $(hdrdir)/ruby/internal/eval.h
+generator.o: $(hdrdir)/ruby/internal/event.h
+generator.o: $(hdrdir)/ruby/internal/fl_type.h
+generator.o: $(hdrdir)/ruby/internal/gc.h
+generator.o: $(hdrdir)/ruby/internal/glob.h
+generator.o: $(hdrdir)/ruby/internal/globals.h
+generator.o: $(hdrdir)/ruby/internal/has/attribute.h
+generator.o: $(hdrdir)/ruby/internal/has/builtin.h
+generator.o: $(hdrdir)/ruby/internal/has/c_attribute.h
+generator.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h
+generator.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h
+generator.o: $(hdrdir)/ruby/internal/has/extension.h
+generator.o: $(hdrdir)/ruby/internal/has/feature.h
+generator.o: $(hdrdir)/ruby/internal/has/warning.h
+generator.o: $(hdrdir)/ruby/internal/intern/array.h
+generator.o: $(hdrdir)/ruby/internal/intern/bignum.h
+generator.o: $(hdrdir)/ruby/internal/intern/class.h
+generator.o: $(hdrdir)/ruby/internal/intern/compar.h
+generator.o: $(hdrdir)/ruby/internal/intern/complex.h
+generator.o: $(hdrdir)/ruby/internal/intern/cont.h
+generator.o: $(hdrdir)/ruby/internal/intern/dir.h
+generator.o: $(hdrdir)/ruby/internal/intern/enum.h
+generator.o: $(hdrdir)/ruby/internal/intern/enumerator.h
+generator.o: $(hdrdir)/ruby/internal/intern/error.h
+generator.o: $(hdrdir)/ruby/internal/intern/eval.h
+generator.o: $(hdrdir)/ruby/internal/intern/file.h
+generator.o: $(hdrdir)/ruby/internal/intern/hash.h
+generator.o: $(hdrdir)/ruby/internal/intern/io.h
+generator.o: $(hdrdir)/ruby/internal/intern/load.h
+generator.o: $(hdrdir)/ruby/internal/intern/marshal.h
+generator.o: $(hdrdir)/ruby/internal/intern/numeric.h
+generator.o: $(hdrdir)/ruby/internal/intern/object.h
+generator.o: $(hdrdir)/ruby/internal/intern/parse.h
+generator.o: $(hdrdir)/ruby/internal/intern/proc.h
+generator.o: $(hdrdir)/ruby/internal/intern/process.h
+generator.o: $(hdrdir)/ruby/internal/intern/random.h
+generator.o: $(hdrdir)/ruby/internal/intern/range.h
+generator.o: $(hdrdir)/ruby/internal/intern/rational.h
+generator.o: $(hdrdir)/ruby/internal/intern/re.h
+generator.o: $(hdrdir)/ruby/internal/intern/ruby.h
+generator.o: $(hdrdir)/ruby/internal/intern/select.h
+generator.o: $(hdrdir)/ruby/internal/intern/select/largesize.h
+generator.o: $(hdrdir)/ruby/internal/intern/set.h
+generator.o: $(hdrdir)/ruby/internal/intern/signal.h
+generator.o: $(hdrdir)/ruby/internal/intern/sprintf.h
+generator.o: $(hdrdir)/ruby/internal/intern/string.h
+generator.o: $(hdrdir)/ruby/internal/intern/struct.h
+generator.o: $(hdrdir)/ruby/internal/intern/thread.h
+generator.o: $(hdrdir)/ruby/internal/intern/time.h
+generator.o: $(hdrdir)/ruby/internal/intern/variable.h
+generator.o: $(hdrdir)/ruby/internal/intern/vm.h
+generator.o: $(hdrdir)/ruby/internal/interpreter.h
+generator.o: $(hdrdir)/ruby/internal/iterator.h
+generator.o: $(hdrdir)/ruby/internal/memory.h
+generator.o: $(hdrdir)/ruby/internal/method.h
+generator.o: $(hdrdir)/ruby/internal/module.h
+generator.o: $(hdrdir)/ruby/internal/newobj.h
+generator.o: $(hdrdir)/ruby/internal/scan_args.h
+generator.o: $(hdrdir)/ruby/internal/special_consts.h
+generator.o: $(hdrdir)/ruby/internal/static_assert.h
+generator.o: $(hdrdir)/ruby/internal/stdalign.h
+generator.o: $(hdrdir)/ruby/internal/stdbool.h
+generator.o: $(hdrdir)/ruby/internal/stdckdint.h
+generator.o: $(hdrdir)/ruby/internal/symbol.h
+generator.o: $(hdrdir)/ruby/internal/value.h
+generator.o: $(hdrdir)/ruby/internal/value_type.h
+generator.o: $(hdrdir)/ruby/internal/variable.h
+generator.o: $(hdrdir)/ruby/internal/warning_push.h
+generator.o: $(hdrdir)/ruby/internal/xmalloc.h
+generator.o: $(hdrdir)/ruby/missing.h
+generator.o: $(hdrdir)/ruby/onigmo.h
+generator.o: $(hdrdir)/ruby/oniguruma.h
+generator.o: $(hdrdir)/ruby/re.h
+generator.o: $(hdrdir)/ruby/regex.h
+generator.o: $(hdrdir)/ruby/ruby.h
+generator.o: $(hdrdir)/ruby/st.h
+generator.o: $(hdrdir)/ruby/subst.h
+generator.o: $(srcdir)/../fbuffer/fbuffer.h
+generator.o: $(srcdir)/../json.h
+generator.o: $(srcdir)/../simd/simd.h
+generator.o: $(srcdir)/../vendor/fpconv.c
+generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h
+generator.o: generator.c
+# AUTOGENERATED DEPENDENCIES END
diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb
index b94f71e8b3..33af03ea30 100644
--- a/ext/json/generator/extconf.rb
+++ b/ext/json/generator/extconf.rb
@@ -1,10 +1,19 @@
require 'mkmf'
-require 'rbconfig'
-if RUBY_VERSION < "1.9"
- have_header("re.h")
+if RUBY_ENGINE == 'truffleruby'
+ # The pure-Ruby generator is faster on TruffleRuby, so skip compiling the generator extension
+ File.write('Makefile', dummy_makefile("").join)
else
- have_header("ruby/re.h")
- have_header("ruby/encoding.h")
+ append_cflags("-std=c99")
+ have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3
+ have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1
+
+ $defs << "-DJSON_GENERATOR"
+ $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0"
+
+ if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
+ load __dir__ + "/../simd/conf.rb"
+ end
+
+ create_makefile 'json/ext/generator'
end
-create_makefile 'json/ext/generator'
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index fac7abb6f2..82853633ba 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -1,1023 +1,1384 @@
-#include "generator.h"
+#include "../json.h"
+#include "../fbuffer/fbuffer.h"
+#include "../vendor/fpconv.c"
-#ifdef HAVE_RUBY_ENCODING_H
-static VALUE CEncoding_UTF_8;
-static ID i_encoding, i_encode;
-#endif
+#include <math.h>
+#include <ctype.h>
-static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
- mHash, mArray, mFixnum, mBignum, mFloat, mString, mString_Extend,
- mTrueClass, mFalseClass, mNilClass, eGeneratorError,
- eNestingError, CRegexp_MULTILINE, CJSON_SAFE_STATE_PROTOTYPE,
- i_SAFE_STATE_PROTOTYPE;
+#include "../simd/simd.h"
-static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
- i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
- i_pack, i_unpack, i_create_id, i_extend, i_key_p, i_aref, i_send,
- i_respond_to_p, i_match, i_keys, i_depth, i_dup;
+/* ruby api and some helpers */
-/*
- * Copyright 2001-2004 Unicode, Inc.
- *
- * Disclaimer
- *
- * This source code is provided as is by Unicode, Inc. No claims are
- * made as to fitness for any particular purpose. No warranties of any
- * kind are expressed or implied. The recipient agrees to determine
- * applicability of information provided. If this file has been
- * purchased on magnetic or optical media from Unicode, Inc., the
- * sole remedy for any claim will be exchange of defective media
- * within 90 days of receipt.
- *
- * Limitations on Rights to Redistribute This Code
- *
- * Unicode, Inc. hereby grants the right to freely use the information
- * supplied in this file in the creation of products supporting the
- * Unicode Standard, and to make copies of this file in any form
- * for internal or external distribution as long as this notice
- * remains attached.
- */
+enum duplicate_key_action {
+ JSON_DEPRECATED = 0,
+ JSON_IGNORE,
+ JSON_RAISE,
+};
-/*
- * Index into the table below with the first byte of a UTF-8 sequence to
- * get the number of trailing bytes that are supposed to follow it.
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- * left as-is for anyone who may want to do such conversion, which was
- * allowed in earlier algorithms.
- */
-static const char trailingBytesForUTF8[256] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+typedef struct JSON_Generator_StateStruct {
+ VALUE indent;
+ VALUE space;
+ VALUE space_before;
+ VALUE object_nl;
+ VALUE array_nl;
+ VALUE as_json;
+
+ long max_nesting;
+ long depth;
+ long buffer_initial_length;
+
+ enum duplicate_key_action on_duplicate_key;
+
+ bool as_json_single_arg;
+ bool allow_nan;
+ bool ascii_only;
+ bool script_safe;
+ bool strict;
+} JSON_Generator_State;
+
+static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
+
+static ID i_to_s, i_to_json, i_new, i_encode;
+static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
+ sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
+
+
+#define GET_STATE_TO(self, state) \
+ TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state)
+
+#define GET_STATE(self) \
+ JSON_Generator_State *state; \
+ GET_STATE_TO(self, state)
+
+struct generate_json_data;
+
+typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+
+struct generate_json_data {
+ FBuffer *buffer;
+ VALUE vstate;
+ JSON_Generator_State *state;
+ VALUE obj;
+ generator_func func;
+ long depth;
};
-/*
- * Magic values subtracted from a buffer value during UTF8 conversion.
- * This table contains as many values as there might be trailing bytes
- * in a UTF-8 sequence.
- */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+static SIMD_Implementation simd_impl;
-/*
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
- * This must be called with the length pre-determined by the first byte.
- * If not calling this from ConvertUTF8to*, then the length can be set by:
- * length = trailingBytesForUTF8[*source]+1;
- * and the sequence is illegal right away if there aren't that many bytes
- * available.
- * If presented with a length > 4, this returns 0. The Unicode
- * definition of UTF-8 goes up to 4-byte sequences.
- */
-static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
-{
- UTF8 a;
- const UTF8 *srcptr = source+length;
- switch (length) {
- default: return 0;
- /* Everything else falls through when "1"... */
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
-
- switch (*source) {
- /* no fall-through in this inner switch */
- case 0xE0: if (a < 0xA0) return 0; break;
- case 0xED: if (a > 0x9F) return 0; break;
- case 0xF0: if (a < 0x90) return 0; break;
- case 0xF4: if (a > 0x8F) return 0; break;
- default: if (a < 0x80) return 0;
- }
+static VALUE cState_from_state_s(VALUE self, VALUE opts);
+static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
- }
- if (*source > 0xF4) return 0;
- return 1;
-}
+static int usascii_encindex, utf8_encindex, binary_encindex;
-/* Escapes the UTF16 character and stores the result in the buffer buf. */
-static void unicode_escape(char *buf, UTF16 character)
+NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
{
- const char *digits = "0123456789abcdef";
-
- buf[2] = digits[character >> 12];
- buf[3] = digits[(character >> 8) & 0xf];
- buf[4] = digits[(character >> 4) & 0xf];
- buf[5] = digits[character & 0xf];
+ rb_enc_associate_index(str, utf8_encindex);
+ VALUE exc = rb_exc_new_str(eGeneratorError, str);
+ rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
+ rb_exc_raise(exc);
}
-/* Escapes the UTF16 character and stores the result in the buffer buf, then
- * the buffer buf іs appended to the FBuffer buffer. */
-static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
- character)
-{
- unicode_escape(buf, character);
- fbuffer_append(buffer, buf, 6);
+#ifdef RBIMPL_ATTR_FORMAT
+RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
+#endif
+NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ VALUE str = rb_vsprintf(fmt, args);
+ va_end(args);
+ raise_generator_error_str(invalid_object, str);
+}
+
+// 0 - single byte char that don't need to be escaped.
+// (x | 8) - char that needs to be escaped.
+static const unsigned char CHAR_LENGTH_MASK = 7;
+static const unsigned char ESCAPE_MASK = 8;
+
+typedef struct _search_state {
+ const char *ptr;
+ const char *end;
+ const char *cursor;
+ FBuffer *buffer;
+
+#ifdef HAVE_SIMD
+ const char *chunk_base;
+ const char *chunk_end;
+ bool has_matches;
+
+#if defined(HAVE_SIMD_NEON)
+ uint64_t matches_mask;
+#elif defined(HAVE_SIMD_SSE2)
+ int matches_mask;
+#else
+#error "Unknown SIMD Implementation."
+#endif /* HAVE_SIMD_NEON */
+#endif /* HAVE_SIMD */
+} search_state;
+
+ALWAYS_INLINE(static) void search_flush(search_state *search)
+{
+ // Do not remove this conditional without profiling, specifically escape-heavy text.
+ // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
+ // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
+ // nothing needs to be flushed, we can save a few memory references with this conditional.
+ if (search->ptr > search->cursor) {
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
+ search->cursor = search->ptr;
+ }
}
-/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
- * and control characters are JSON escaped. */
-static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string)
-{
- const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
- const UTF8 *sourceEnd = source + RSTRING_LEN(string);
- char buf[6] = { '\\', 'u' };
+static const unsigned char escape_table_basic[256] = {
+ // ASCII Control Characters
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ // ASCII Characters
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
- while (source < sourceEnd) {
- UTF32 ch = 0;
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "partial character in source, but hit end");
+static inline unsigned char search_escape_basic(search_state *search)
+{
+ while (search->ptr < search->end) {
+ if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
+ search_flush(search);
+ return 1;
+ } else {
+ search->ptr++;
}
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
+ }
+ search_flush(search);
+ return 0;
+}
+
+ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
+{
+ const unsigned char ch = (unsigned char)*search->ptr;
+ switch (ch) {
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
+ default: {
+ const char *hexdig = "0123456789abcdef";
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
+ scratch[5] = hexdig[ch & 0xf];
+ fbuffer_append(search->buffer, scratch, 6);
+ break;
}
- /*
- * The cases all fall through. See "Note A" below.
- */
- switch (extraBytesToRead) {
- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
- case 3: ch += *source++; ch <<= 6;
- case 2: ch += *source++; ch <<= 6;
- case 1: ch += *source++; ch <<= 6;
- case 0: ch += *source++;
+ }
+ search->ptr++;
+ search->cursor = search->ptr;
+}
+
+/* Converts in_string to a JSON string (without the wrapping '"'
+ * characters) in FBuffer out_buffer.
+ *
+ * Character are JSON-escaped according to:
+ *
+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
+ * backslash.
+ *
+ * - If out_ascii_only: non-ASCII characters (>0x7F)
+ *
+ * - If script_safe: forwardslash (/), line separator (U+2028), and
+ * paragraph separator (U+2029)
+ *
+ * Everything else (should be UTF-8) is just passed through and
+ * appended to the result.
+ */
+
+
+#if defined(HAVE_SIMD_NEON)
+static inline unsigned char search_escape_basic_neon(search_state *search);
+#elif defined(HAVE_SIMD_SSE2)
+static inline unsigned char search_escape_basic_sse2(search_state *search);
+#endif
+
+static inline unsigned char search_escape_basic(search_state *search);
+
+static inline void convert_UTF8_to_JSON(search_state *search)
+{
+#ifdef HAVE_SIMD
+#if defined(HAVE_SIMD_NEON)
+ while (search_escape_basic_neon(search)) {
+ escape_UTF8_char_basic(search);
+ }
+#elif defined(HAVE_SIMD_SSE2)
+ if (simd_impl == SIMD_SSE2) {
+ while (search_escape_basic_sse2(search)) {
+ escape_UTF8_char_basic(search);
}
- ch -= offsetsFromUTF8[extraBytesToRead];
-
- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
- /* UTF-16 surrogate values are illegal in UTF-32 */
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-#if UNI_STRICT_CONVERSION
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf-8");
-#else
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
+ return;
+ }
+ while (search_escape_basic(search)) {
+ escape_UTF8_char_basic(search);
+ }
#endif
- } else {
- /* normal case */
- if (ch >= 0x20 && ch <= 0x7f) {
- switch (ch) {
- case '\\':
- fbuffer_append(buffer, "\\\\", 2);
- break;
- case '"':
- fbuffer_append(buffer, "\\\"", 2);
- break;
- default:
- fbuffer_append_char(buffer, (char)ch);
- break;
- }
- } else {
- switch (ch) {
- case '\n':
- fbuffer_append(buffer, "\\n", 2);
- break;
- case '\r':
- fbuffer_append(buffer, "\\r", 2);
- break;
- case '\t':
- fbuffer_append(buffer, "\\t", 2);
- break;
- case '\f':
- fbuffer_append(buffer, "\\f", 2);
- break;
- case '\b':
- fbuffer_append(buffer, "\\b", 2);
- break;
- default:
- unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
- break;
- }
- }
- }
- } else if (ch > UNI_MAX_UTF16) {
-#if UNI_STRICT_CONVERSION
- source -= (extraBytesToRead+1); /* return to the start */
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf8");
#else
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
-#endif
- } else {
- /* target is a character in range 0xFFFF - 0x10FFFF. */
- ch -= halfBase;
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
- }
+ while (search_escape_basic(search)) {
+ escape_UTF8_char_basic(search);
}
-}
-
-/* Converts string to a JSON string in FBuffer buffer, where only the
- * characters required by the JSON standard are JSON escaped. The remaining
- * characters (should be UTF8) are just passed through and appended to the
- * result. */
-static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string)
-{
- const char *ptr = RSTRING_PTR(string), *p;
- unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
- const char *escape = NULL;
- int escape_len;
- unsigned char c;
- char buf[6] = { '\\', 'u' };
-
- for (start = 0, end = 0; end < len;) {
- p = ptr + end;
- c = (unsigned char) *p;
- if (c < 0x20) {
- switch (c) {
- case '\n':
- escape = "\\n";
- escape_len = 2;
- break;
- case '\r':
- escape = "\\r";
- escape_len = 2;
- break;
- case '\t':
- escape = "\\t";
- escape_len = 2;
- break;
- case '\f':
- escape = "\\f";
- escape_len = 2;
- break;
- case '\b':
- escape = "\\b";
- escape_len = 2;
- break;
- default:
- unicode_escape(buf, (UTF16) *p);
- escape = buf;
- escape_len = 6;
+#endif /* HAVE_SIMD */
+}
+
+static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
+{
+ const unsigned char ch = (unsigned char)*search->ptr;
+ switch (ch_len) {
+ case 1: {
+ switch (ch) {
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
+ default: {
+ const char *hexdig = "0123456789abcdef";
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
+ scratch[5] = hexdig[ch & 0xf];
+ fbuffer_append(search->buffer, scratch, 6);
break;
+ }
}
- } else {
- switch (c) {
- case '\\':
- escape = "\\\\";
- escape_len = 2;
- break;
- case '"':
- escape = "\\\"";
- escape_len = 2;
- break;
- default:
- end++;
- continue;
- break;
+ break;
+ }
+ case 3: {
+ if (search->ptr[2] & 1) {
+ fbuffer_append(search->buffer, "\\u2029", 6);
+ } else {
+ fbuffer_append(search->buffer, "\\u2028", 6);
}
+ break;
}
- fbuffer_append(buffer, ptr + start, end - start);
- fbuffer_append(buffer, escape, escape_len);
- start = ++end;
- escape = NULL;
}
- fbuffer_append(buffer, ptr + start, end - start);
-}
-
-static char *fstrndup(const char *ptr, unsigned long len) {
- char *result;
- if (len <= 0) return NULL;
- result = ALLOC_N(char, len);
- memccpy(result, ptr, 0, len);
- return result;
+ search->cursor = (search->ptr += ch_len);
}
-/* fbuffer implementation */
+#ifdef HAVE_SIMD
-static FBuffer *fbuffer_alloc()
+ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
{
- FBuffer *fb = ALLOC(FBuffer);
- memset((void *) fb, 0, sizeof(FBuffer));
- fb->initial_length = FBUFFER_INITIAL_LENGTH;
- return fb;
-}
+ RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
-static FBuffer *fbuffer_alloc_with_length(unsigned long initial_length)
-{
- FBuffer *fb;
- assert(initial_length > 0);
- fb = ALLOC(FBuffer);
- memset((void *) fb, 0, sizeof(FBuffer));
- fb->initial_length = initial_length;
- return fb;
-}
+ // Flush the buffer so everything up until the last 'len' characters are unflushed.
+ search_flush(search);
-static void fbuffer_free(FBuffer *fb)
-{
- if (fb->ptr) ruby_xfree(fb->ptr);
- ruby_xfree(fb);
+ FBuffer *buf = search->buffer;
+ fbuffer_inc_capa(buf, vec_len);
+
+ char *s = (buf->ptr + buf->len);
+
+ // Pad the buffer with dummy characters that won't need escaping.
+ // This seem wasteful at first sight, but memset of vector length is very fast.
+ // This is a space as it can be directly represented as an immediate on AArch64.
+ memset(s, ' ', vec_len);
+
+ // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
+ // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
+ if (vec_len == 16) {
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
+ json_fast_memcpy16(s, search->ptr, len);
+ } else {
+ MEMCPY(s, search->ptr, char, len);
+ }
+
+ return s;
}
-static void fbuffer_clear(FBuffer *fb)
+#ifdef HAVE_SIMD_NEON
+
+ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
{
- fb->len = 0;
+ uint64_t mask = search->matches_mask;
+ uint32_t index = trailing_zeros64(mask) >> 2;
+
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+ // If we want to use a similar approach for full escaping we'll need to ensure:
+ // search->chunk_base + index >= search->ptr
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+ // is one byte after the previous match then:
+ // search->chunk_base + index == search->ptr
+ search->ptr = search->chunk_base + index;
+ mask &= mask - 1;
+ search->matches_mask = mask;
+ search_flush(search);
+ return 1;
}
-static void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
+static inline unsigned char search_escape_basic_neon(search_state *search)
{
- unsigned long required;
+ if (RB_UNLIKELY(search->has_matches)) {
+ // There are more matches if search->matches_mask > 0.
+ if (search->matches_mask > 0) {
+ return neon_next_match(search);
+ } else {
+ // neon_next_match will only advance search->ptr up to the last matching character.
+ // Skip over any characters in the last chunk that occur after the last match.
+ search->has_matches = false;
+ search->ptr = search->chunk_end;
+ }
+ }
- if (!fb->ptr) {
- fb->ptr = ALLOC_N(char, fb->initial_length);
- fb->capa = fb->initial_length;
+ /*
+ * The code below implements an SIMD-based algorithm to determine if N bytes at a time
+ * need to be escaped.
+ *
+ * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
+ *
+ * The explanation will be limited to the first 8 bytes of the string for simplicity. However
+ * the vector insructions may work on larger vectors.
+ *
+ * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
+ *
+ * lower_bound: [20 20 20 20 20 20 20 20]
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
+ * dblquote: [22 22 22 22 22 22 22 22]
+ *
+ * Next we load the first chunk of the ptr:
+ * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
+ *
+ * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
+ * as no bytes are less than 32 (0x20):
+ * [0 0 0 0 0 0 0 0]
+ *
+ * Next, we check if any byte in chunk is equal to a backslash:
+ * [0 0 0 FF 0 0 0 0]
+ *
+ * Finally we check if any byte in chunk is equal to a double quote:
+ * [FF 0 0 0 0 0 0 0]
+ *
+ * Now we have three vectors where each byte indicates if the corresponding byte in chunk
+ * needs to be escaped. We combine these vectors with a series of logical OR instructions.
+ * This is the needs_escape vector and it is equal to:
+ * [FF 0 0 FF 0 0 0 0]
+ *
+ * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
+ * the values in the vector. This computes how many bytes need to be escaped within this chunk.
+ *
+ * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
+ * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
+ * have at least one byte that needs to be escaped.
+ */
+
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
+ search->has_matches = true;
+ search->chunk_base = search->ptr;
+ search->chunk_end = search->ptr + sizeof(uint8x16_t);
+ return neon_next_match(search);
}
- for (required = fb->capa; requested > required - fb->len; required <<= 1);
+ // There are fewer than 16 bytes left.
+ unsigned long remaining = (search->end - search->ptr);
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+ char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
+
+ uint64_t mask = compute_chunk_mask_neon(s);
- if (required > fb->capa) {
- REALLOC_N(fb->ptr, char, required);
- fb->capa = required;
+ if (!mask) {
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
+ // search->cursor to search->ptr.
+ fbuffer_consumed(search->buffer, remaining);
+ search->ptr = search->end;
+ search->cursor = search->end;
+ return 0;
+ }
+
+ search->matches_mask = mask;
+ search->has_matches = true;
+ search->chunk_end = search->end;
+ search->chunk_base = search->ptr;
+ return neon_next_match(search);
}
-}
-static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len)
-{
- if (len > 0) {
- fbuffer_inc_capa(fb, len);
- MEMCPY(fb->ptr + fb->len, newstr, char, len);
- fb->len += len;
+ if (search->ptr < search->end) {
+ return search_escape_basic(search);
}
+
+ search_flush(search);
+ return 0;
}
+#endif /* HAVE_SIMD_NEON */
+
+#ifdef HAVE_SIMD_SSE2
-static void fbuffer_append_char(FBuffer *fb, char newchr)
+ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
{
- fbuffer_inc_capa(fb, 1);
- *(fb->ptr + fb->len) = newchr;
- fb->len++;
+ int mask = search->matches_mask;
+ int index = trailing_zeros(mask);
+
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+ // If we want to use a similar approach for full escaping we'll need to ensure:
+ // search->chunk_base + index >= search->ptr
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+ // is one byte after the previous match then:
+ // search->chunk_base + index == search->ptr
+ search->ptr = search->chunk_base + index;
+ mask &= mask - 1;
+ search->matches_mask = mask;
+ search_flush(search);
+ return 1;
}
-static void freverse(char *start, char *end)
+#if defined(__clang__) || defined(__GNUC__)
+#define TARGET_SSE2 __attribute__((target("sse2")))
+#else
+#define TARGET_SSE2
+#endif
+
+ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
{
- char c;
+ if (RB_UNLIKELY(search->has_matches)) {
+ // There are more matches if search->matches_mask > 0.
+ if (search->matches_mask > 0) {
+ return sse2_next_match(search);
+ } else {
+ // sse2_next_match will only advance search->ptr up to the last matching character.
+ // Skip over any characters in the last chunk that occur after the last match.
+ search->has_matches = false;
+ if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
+ search->ptr = search->end;
+ } else {
+ search->ptr = search->chunk_base + sizeof(__m128i);
+ }
+ }
+ }
- while (end > start) {
- c = *end, *end-- = *start, *start++ = c;
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
+ search->has_matches = true;
+ search->chunk_base = search->ptr;
+ search->chunk_end = search->ptr + sizeof(__m128i);
+ return sse2_next_match(search);
}
-}
-static long fltoa(long number, char *buf)
+ // There are fewer than 16 bytes left.
+ unsigned long remaining = (search->end - search->ptr);
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+ char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
+
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
+
+ if (needs_escape_mask == 0) {
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
+ // search->cursor to search->ptr.
+ fbuffer_consumed(search->buffer, remaining);
+ search->ptr = search->end;
+ search->cursor = search->end;
+ return 0;
+ }
+
+ search->has_matches = true;
+ search->matches_mask = needs_escape_mask;
+ search->chunk_base = search->ptr;
+ return sse2_next_match(search);
+ }
+
+ if (search->ptr < search->end) {
+ return search_escape_basic(search);
+ }
+
+ search_flush(search);
+ return 0;
+}
+
+#endif /* HAVE_SIMD_SSE2 */
+
+#endif /* HAVE_SIMD */
+
+static const unsigned char script_safe_escape_table[256] = {
+ // ASCII Control Characters
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ // ASCII Characters
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Continuation byte
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // First byte of a 2-byte code point
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ // First byte of a 3-byte code point
+ 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
+ //First byte of a 4+ byte code point
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
+};
+
+static inline unsigned char search_script_safe_escape(search_state *search)
{
- static char digits[] = "0123456789";
- long sign = number;
- char* tmp = buf;
+ while (search->ptr < search->end) {
+ unsigned char ch = (unsigned char)*search->ptr;
+ unsigned char ch_len = script_safe_escape_table[ch];
- if (sign < 0) number = -number;
- do *tmp++ = digits[number % 10]; while (number /= 10);
- if (sign < 0) *tmp++ = '-';
- freverse(buf, tmp - 1);
- return tmp - buf;
+ if (RB_UNLIKELY(ch_len)) {
+ if (ch_len & ESCAPE_MASK) {
+ if (RB_UNLIKELY(ch_len == 11)) {
+ const unsigned char *uptr = (const unsigned char *)search->ptr;
+ if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
+ search->ptr += 3;
+ continue;
+ }
+ }
+ search_flush(search);
+ return ch_len & CHAR_LENGTH_MASK;
+ } else {
+ search->ptr += ch_len;
+ }
+ } else {
+ search->ptr++;
+ }
+ }
+ search_flush(search);
+ return 0;
}
-static void fbuffer_append_long(FBuffer *fb, long number)
+static void convert_UTF8_to_script_safe_JSON(search_state *search)
{
- char buf[20];
- unsigned long len = fltoa(number, buf);
- fbuffer_append(fb, buf, len);
+ unsigned char ch_len;
+ while ((ch_len = search_script_safe_escape(search))) {
+ escape_UTF8_char(search, ch_len);
+ }
}
-static FBuffer *fbuffer_dup(FBuffer *fb)
+static const unsigned char ascii_only_escape_table[256] = {
+ // ASCII Control Characters
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ // ASCII Characters
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // Continuation byte
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // First byte of a 2-byte code point
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ // First byte of a 3-byte code point
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ //First byte of a 4+ byte code point
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
+};
+
+static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
{
- unsigned long len = fb->len;
- FBuffer *result;
+ while (search->ptr < search->end) {
+ unsigned char ch = (unsigned char)*search->ptr;
+ unsigned char ch_len = escape_table[ch];
- if (len > 0) {
- result = fbuffer_alloc_with_length(len);
- fbuffer_append(result, FBUFFER_PAIR(fb));
- } else {
- result = fbuffer_alloc();
+ if (RB_UNLIKELY(ch_len)) {
+ search_flush(search);
+ return ch_len & CHAR_LENGTH_MASK;
+ } else {
+ search->ptr++;
+ }
+ }
+ search_flush(search);
+ return 0;
+}
+
+static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
+{
+ const unsigned char ch = (unsigned char)*search->ptr;
+ switch (ch_len) {
+ case 1: {
+ switch (ch) {
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
+ default: {
+ const char *hexdig = "0123456789abcdef";
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
+ scratch[5] = hexdig[ch & 0xf];
+ fbuffer_append(search->buffer, scratch, 6);
+ break;
+ }
+ }
+ break;
+ }
+ default: {
+ const char *hexdig = "0123456789abcdef";
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
+
+ uint32_t wchar = 0;
+
+ switch (ch_len) {
+ case 2:
+ wchar = ch & 0x1F;
+ break;
+ case 3:
+ wchar = ch & 0x0F;
+ break;
+ case 4:
+ wchar = ch & 0x07;
+ break;
+ }
+
+ for (short i = 1; i < ch_len; i++) {
+ wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
+ }
+
+ if (wchar <= 0xFFFF) {
+ scratch[2] = hexdig[wchar >> 12];
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
+ scratch[5] = hexdig[wchar & 0xf];
+ fbuffer_append(search->buffer, scratch, 6);
+ } else {
+ uint16_t hi, lo;
+ wchar -= 0x10000;
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
+
+ scratch[2] = hexdig[hi >> 12];
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
+ scratch[5] = hexdig[hi & 0xf];
+
+ scratch[8] = hexdig[lo >> 12];
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
+ scratch[11] = hexdig[lo & 0xf];
+
+ fbuffer_append(search->buffer, scratch, 12);
+ }
+
+ break;
+ }
}
- return result;
+ search->cursor = (search->ptr += ch_len);
}
-/*
- * Document-module: JSON::Ext::Generator
- *
- * This is the JSON generator implemented as a C extension. It can be
- * configured to be used by setting
- *
- * JSON.generator = JSON::Ext::Generator
- *
- * with the method generator= in JSON.
- *
- */
+static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
+{
+ unsigned char ch_len;
+ while ((ch_len = search_ascii_only_escape(search, escape_table))) {
+ full_escape_UTF8_char(search, ch_len);
+ }
+}
-/*
- * call-seq: to_json(state = nil)
- *
- * Returns a JSON string containing a JSON object, that is generated from
- * this Hash instance.
- * _state_ is a JSON::State object, that can also be used to configure the
- * produced JSON string output further.
- */
-static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
+static void State_mark(void *ptr)
{
- GENERATE_JSON(object);
+ JSON_Generator_State *state = ptr;
+ rb_gc_mark_movable(state->indent);
+ rb_gc_mark_movable(state->space);
+ rb_gc_mark_movable(state->space_before);
+ rb_gc_mark_movable(state->object_nl);
+ rb_gc_mark_movable(state->array_nl);
+ rb_gc_mark_movable(state->as_json);
}
-/*
- * call-seq: to_json(state = nil)
- *
- * Returns a JSON string containing a JSON array, that is generated from
- * this Array instance.
- * _state_ is a JSON::State object, that can also be used to configure the
- * produced JSON string output further.
- */
-static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
- GENERATE_JSON(array);
+static void State_compact(void *ptr)
+{
+ JSON_Generator_State *state = ptr;
+ state->indent = rb_gc_location(state->indent);
+ state->space = rb_gc_location(state->space);
+ state->space_before = rb_gc_location(state->space_before);
+ state->object_nl = rb_gc_location(state->object_nl);
+ state->array_nl = rb_gc_location(state->array_nl);
+ state->as_json = rb_gc_location(state->as_json);
}
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Integer number.
- */
-static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
+static size_t State_memsize(const void *ptr)
{
- GENERATE_JSON(fixnum);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ return 0;
+#else
+ return sizeof(JSON_Generator_State);
+#endif
}
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Integer number.
- */
-static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
+static const rb_data_type_t JSON_Generator_State_type = {
+ .wrap_struct_name = "JSON/Generator/State",
+ .function = {
+ .dmark = State_mark,
+ .dfree = RUBY_DEFAULT_FREE,
+ .dsize = State_memsize,
+ .dcompact = State_compact,
+ },
+ .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
+};
+
+static void state_init(JSON_Generator_State *state)
{
- GENERATE_JSON(bignum);
+ state->max_nesting = 100;
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
}
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Float number.
- */
-static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
+static VALUE cState_s_allocate(VALUE klass)
{
- GENERATE_JSON(float);
+ JSON_Generator_State *state;
+ VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
+ state_init(state);
+ return obj;
}
-/*
- * call-seq: String.included(modul)
- *
- * Extends _modul_ with the String::Extend module.
- */
-static VALUE mString_included_s(VALUE self, VALUE modul) {
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
- return result;
+static void vstate_spill(struct generate_json_data *data)
+{
+ VALUE vstate = cState_s_allocate(cState);
+ GET_STATE(vstate);
+ MEMCPY(state, data->state, JSON_Generator_State, 1);
+ data->state = state;
+ data->vstate = vstate;
+ RB_OBJ_WRITTEN(vstate, Qundef, state->indent);
+ RB_OBJ_WRITTEN(vstate, Qundef, state->space);
+ RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
+ RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
+ RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
+ RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
}
-/*
- * call-seq: to_json(*)
- *
- * This string should be encoded with UTF-8 A call to this method
- * returns a JSON string encoded with UTF16 big endian characters as
- * \u????.
- */
-static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
+static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
{
- GENERATE_JSON(string);
+ if (RB_UNLIKELY(!data->vstate)) {
+ vstate_spill(data);
+ }
+ GET_STATE(data->vstate);
+ state->depth = data->depth;
+ VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
+ // no need to restore state->depth, vstate is just a temporary State
+ return tmp;
}
-/*
- * call-seq: to_json_raw_object()
- *
- * This method creates a raw object hash, that can be nested into
- * other data structures and will be generated as a raw string. This
- * method should be used, if you want to convert raw strings to JSON
- * instead of UTF-8 strings, e. g. binary data.
- */
-static VALUE mString_to_json_raw_object(VALUE self)
+static VALUE
+json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
{
- VALUE ary;
- VALUE result = rb_hash_new();
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
- rb_hash_aset(result, rb_str_new2("raw"), ary);
- return result;
+ VALUE proc_args[2] = {object, is_key};
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
}
-/*
- * call-seq: to_json_raw(*args)
- *
- * This method creates a JSON text from the result of a call to
- * to_json_raw_object of this String.
- */
-static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
+static VALUE
+convert_string_subclass(VALUE key)
{
- VALUE obj = mString_to_json_raw_object(self);
- Check_Type(obj, T_HASH);
- return mHash_to_json(argc, argv, obj);
+ VALUE key_to_s = rb_funcall(key, i_to_s, 0);
+
+ if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
+ VALUE cname = rb_obj_class(key);
+ rb_raise(rb_eTypeError,
+ "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
+ cname, "String", cname, "to_s", rb_obj_class(key_to_s));
+ }
+
+ return key_to_s;
}
-/*
- * call-seq: json_create(o)
- *
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
- * key "raw"). The Ruby String can be created by this module method.
- */
-static VALUE mString_Extend_json_create(VALUE self, VALUE o)
+static bool enc_utf8_compatible_p(int enc_idx)
{
- VALUE ary;
- Check_Type(o, T_HASH);
- ary = rb_hash_aref(o, rb_str_new2("raw"));
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
+ if (enc_idx == usascii_encindex) return true;
+ if (enc_idx == utf8_encindex) return true;
+ return false;
}
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string for true: 'true'.
- */
-static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
+static VALUE encode_json_string_try(VALUE str)
{
- GENERATE_JSON(true);
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
}
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string for false: 'false'.
- */
-static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
+static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
{
- GENERATE_JSON(false);
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
+ return Qundef;
}
-/*
- * call-seq: to_json(*)
- *
- */
-static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
+static inline int json_str_coderange(VALUE str) {
+ int coderange = RB_ENC_CODERANGE(str);
+ if (coderange == RUBY_ENC_CODERANGE_UNKNOWN) {
+ coderange = rb_enc_str_coderange(str);
+ }
+ return coderange;
+}
+
+static inline bool valid_json_string_p(VALUE str)
{
- GENERATE_JSON(null);
+ int coderange = json_str_coderange(str);
+
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
+ return true;
+ }
+
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
+ }
+
+ return false;
}
-/*
- * call-seq: to_json(*)
- *
- * Converts this object to a string (calling #to_s), converts
- * it to a JSON string, and returns the result. This is a fallback, if no
- * special method #to_json was defined for some object.
- */
-static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
+NOINLINE(static) VALUE convert_invalid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
{
- VALUE state;
- VALUE string = rb_funcall(self, i_to_s, 0);
- rb_scan_args(argc, argv, "01", &state);
- Check_Type(string, T_STRING);
- state = cState_from_state_s(cState, state);
- return cState_partial_generate(state, string);
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
+ if (coerced_str != str) {
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
+ if (!valid_json_string_p(coerced_str)) {
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
+ }
+ } else {
+ // as_json could return another type than T_STRING
+ if (is_key) {
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
+ }
+ }
+
+ return coerced_str;
+ }
+ }
+
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
+ switch (rb_enc_str_coderange(utf8_string)) {
+ case ENC_CODERANGE_7BIT:
+ return utf8_string;
+ case ENC_CODERANGE_VALID:
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
+ // TODO: Raise in 3.0.0
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
+ return utf8_string;
+ break;
+ }
+ }
+
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
}
-static void State_free(JSON_Generator_State *state)
+ALWAYS_INLINE(static) VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
{
- if (state->indent) ruby_xfree(state->indent);
- if (state->space) ruby_xfree(state->space);
- if (state->space_before) ruby_xfree(state->space_before);
- if (state->object_nl) ruby_xfree(state->object_nl);
- if (state->array_nl) ruby_xfree(state->array_nl);
- if (state->array_delim) fbuffer_free(state->array_delim);
- if (state->object_delim) fbuffer_free(state->object_delim);
- if (state->object_delim2) fbuffer_free(state->object_delim2);
- ruby_xfree(state);
+ if (RB_LIKELY(valid_json_string_p(str))) {
+ return str;
+ }
+ else {
+ return convert_invalid_encoding(data, str, as_json_called, is_key);
+ }
}
-static JSON_Generator_State *State_allocate()
+static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- JSON_Generator_State *state = ALLOC(JSON_Generator_State);
- MEMZERO(state, JSON_Generator_State, 1);
- return state;
+ fbuffer_append_char(buffer, '"');
+
+ long len;
+ search_state search;
+ search.buffer = buffer;
+ RSTRING_GETMEM(obj, search.ptr, len);
+ search.cursor = search.ptr;
+ search.end = search.ptr + len;
+
+#ifdef HAVE_SIMD
+ search.matches_mask = 0;
+ search.has_matches = false;
+ search.chunk_base = NULL;
+ search.chunk_end = NULL;
+#endif /* HAVE_SIMD */
+
+ switch (json_str_coderange(obj)) {
+ case ENC_CODERANGE_7BIT:
+ case ENC_CODERANGE_VALID:
+ if (RB_UNLIKELY(data->state->ascii_only)) {
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
+ convert_UTF8_to_script_safe_JSON(&search);
+ } else {
+ convert_UTF8_to_JSON(&search);
+ }
+ break;
+ default:
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
+ break;
+ }
+ fbuffer_append_char(buffer, '"');
}
-static VALUE cState_s_allocate(VALUE klass)
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- JSON_Generator_State *state = State_allocate();
- return Data_Wrap_Struct(klass, NULL, State_free, state);
+ obj = ensure_valid_encoding(data, obj, false, false);
+ raw_generate_json_string(buffer, data, obj);
}
-/*
- * call-seq: configure(opts)
- *
- * Configure this State instance with the Hash _opts_, and return
- * itself.
- */
-static VALUE cState_configure(VALUE self, VALUE opts)
+struct hash_foreach_arg {
+ VALUE hash;
+ struct generate_json_data *data;
+ int first_key_type;
+ bool first;
+ bool mixed_keys_encountered;
+};
+
+NOINLINE(static) void
+json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
{
- VALUE tmp;
- GET_STATE(self);
- tmp = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
- if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
- if (NIL_P(tmp)) {
- rb_raise(rb_eArgError, "opts has to be hash like or convertable into a hash");
+ if (arg->mixed_keys_encountered) {
+ return;
}
- opts = tmp;
- tmp = rb_hash_aref(opts, ID2SYM(i_indent));
- if (RTEST(tmp)) {
- unsigned long len;
- Check_Type(tmp, T_STRING);
- len = RSTRING_LEN(tmp);
- state->indent = fstrndup(RSTRING_PTR(tmp), len);
- state->indent_len = len;
+ arg->mixed_keys_encountered = true;
+
+ JSON_Generator_State *state = arg->data->state;
+ if (state->on_duplicate_key != JSON_IGNORE) {
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
}
- tmp = rb_hash_aref(opts, ID2SYM(i_space));
- if (RTEST(tmp)) {
- unsigned long len;
- Check_Type(tmp, T_STRING);
- len = RSTRING_LEN(tmp);
- state->space = fstrndup(RSTRING_PTR(tmp), len);
- state->space_len = len;
+}
+
+static int
+json_object_i(VALUE key, VALUE val, VALUE _arg)
+{
+ struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg;
+ struct generate_json_data *data = arg->data;
+
+ FBuffer *buffer = data->buffer;
+ JSON_Generator_State *state = data->state;
+
+ long depth = data->depth;
+ int key_type = rb_type(key);
+
+ if (arg->first) {
+ arg->first = false;
+ arg->first_key_type = key_type;
}
- tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
- if (RTEST(tmp)) {
- unsigned long len;
- Check_Type(tmp, T_STRING);
- len = RSTRING_LEN(tmp);
- state->space_before = fstrndup(RSTRING_PTR(tmp), len);
- state->space_before_len = len;
+ else {
+ fbuffer_append_char(buffer, ',');
}
- tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
- if (RTEST(tmp)) {
- unsigned long len;
- Check_Type(tmp, T_STRING);
- len = RSTRING_LEN(tmp);
- state->array_nl = fstrndup(RSTRING_PTR(tmp), len);
- state->array_nl_len = len;
+
+ if (RB_UNLIKELY(data->state->object_nl)) {
+ fbuffer_append_str(buffer, data->state->object_nl);
}
- tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
- if (RTEST(tmp)) {
- unsigned long len;
- Check_Type(tmp, T_STRING);
- len = RSTRING_LEN(tmp);
- state->object_nl = fstrndup(RSTRING_PTR(tmp), len);
- state->object_nl_len = len;
+ if (RB_UNLIKELY(data->state->indent)) {
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
- tmp = ID2SYM(i_max_nesting);
- state->max_nesting = 19;
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- state->max_nesting = FIX2LONG(max_nesting);
- } else {
- state->max_nesting = 0;
- }
+
+ VALUE key_to_s;
+ bool as_json_called = false;
+
+ start:
+ switch (key_type) {
+ case T_STRING:
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
+ json_inspect_hash_with_mixed_keys(arg);
+ }
+
+ if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
+ key_to_s = key;
+ } else {
+ key_to_s = convert_string_subclass(key);
+ }
+ break;
+ case T_SYMBOL:
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
+ json_inspect_hash_with_mixed_keys(arg);
+ }
+
+ key_to_s = rb_sym2str(key);
+ break;
+ default:
+ if (data->state->strict) {
+ if (RTEST(data->state->as_json) && !as_json_called) {
+ key = json_call_as_json(data->state, key, Qtrue);
+ key_type = rb_type(key);
+ as_json_called = true;
+ goto start;
+ } else {
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
+ }
+ }
+ key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
+ break;
}
- tmp = ID2SYM(i_depth);
- state->depth = 0;
- if (option_given_p(opts, tmp)) {
- VALUE depth = rb_hash_aref(opts, tmp);
- if (RTEST(depth)) {
- Check_Type(depth, T_FIXNUM);
- state->depth = FIX2LONG(depth);
- } else {
- state->depth = 0;
- }
+
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
+
+ if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
+ raw_generate_json_string(buffer, data, key_to_s);
+ } else {
+ generate_json(buffer, data, key_to_s);
}
- tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
- state->allow_nan = RTEST(tmp);
- tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
- state->ascii_only = RTEST(tmp);
- return self;
-}
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
+ fbuffer_append_char(buffer, ':');
+ if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
+ generate_json(buffer, data, val);
-/*
- * call-seq: to_h
- *
- * Returns the configuration instance variables as a hash, that can be
- * passed to the configure method.
- */
-static VALUE cState_to_h(VALUE self)
-{
- VALUE result = rb_hash_new();
- GET_STATE(self);
- rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len));
- rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len));
- rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len));
- rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len));
- rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len));
- rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
- rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
- rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
- rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
- return result;
+ return ST_CONTINUE;
}
-/*
-* call-seq: [](name)
-*
-* Return the value returned by method +name+.
-*/
-static VALUE cState_aref(VALUE self, VALUE name)
+static inline long increase_depth(struct generate_json_data *data)
{
- GET_STATE(self);
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) {
- return rb_funcall(self, i_send, 1, name);
- } else {
- return Qnil;
+ JSON_Generator_State *state = data->state;
+ long depth = ++data->depth;
+ if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
}
+ return depth;
}
-static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
-{
- char *object_nl = state->object_nl;
- long object_nl_len = state->object_nl_len;
- char *indent = state->indent;
- long indent_len = state->indent_len;
- long max_nesting = state->max_nesting;
- char *delim = FBUFFER_PTR(state->object_delim);
- long delim_len = FBUFFER_LEN(state->object_delim);
- char *delim2 = FBUFFER_PTR(state->object_delim2);
- long delim2_len = FBUFFER_LEN(state->object_delim2);
- long depth = ++state->depth;
- int i, j;
- VALUE key, key_to_s, keys;
- if (max_nesting != 0 && depth > max_nesting) {
- fbuffer_free(buffer);
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
+static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ long depth = increase_depth(data);
+
+ if (RHASH_SIZE(obj) == 0) {
+ fbuffer_append(buffer, "{}", 2);
+ --data->depth;
+ return;
}
+
fbuffer_append_char(buffer, '{');
- keys = rb_funcall(obj, i_keys, 0);
- for(i = 0; i < RARRAY_LEN(keys); i++) {
- if (i > 0) fbuffer_append(buffer, delim, delim_len);
- if (object_nl) {
- fbuffer_append(buffer, object_nl, object_nl_len);
- }
- if (indent) {
- for (j = 0; j < depth; j++) {
- fbuffer_append(buffer, indent, indent_len);
- }
- }
- key = rb_ary_entry(keys, i);
- key_to_s = rb_funcall(key, i_to_s, 0);
- Check_Type(key_to_s, T_STRING);
- generate_json(buffer, Vstate, state, key_to_s);
- fbuffer_append(buffer, delim2, delim2_len);
- generate_json(buffer, Vstate, state, rb_hash_aref(obj, key));
- }
- depth = --state->depth;
- if (object_nl) {
- fbuffer_append(buffer, object_nl, object_nl_len);
- if (indent) {
- for (j = 0; j < depth; j++) {
- fbuffer_append(buffer, indent, indent_len);
- }
+
+ struct hash_foreach_arg arg = {
+ .hash = obj,
+ .data = data,
+ .first = true,
+ };
+ rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
+
+ depth = --data->depth;
+ if (RB_UNLIKELY(data->state->object_nl)) {
+ fbuffer_append_str(buffer, data->state->object_nl);
+ if (RB_UNLIKELY(data->state->indent)) {
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
}
fbuffer_append_char(buffer, '}');
}
-static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
-{
- char *array_nl = state->array_nl;
- long array_nl_len = state->array_nl_len;
- char *indent = state->indent;
- long indent_len = state->indent_len;
- long max_nesting = state->max_nesting;
- char *delim = FBUFFER_PTR(state->array_delim);
- long delim_len = FBUFFER_LEN(state->array_delim);
- long depth = ++state->depth;
- int i, j;
- if (max_nesting != 0 && depth > max_nesting) {
- fbuffer_free(buffer);
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
+static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ long depth = increase_depth(data);
+
+ if (RARRAY_LEN(obj) == 0) {
+ fbuffer_append(buffer, "[]", 2);
+ --data->depth;
+ return;
}
+
fbuffer_append_char(buffer, '[');
- if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len);
- for(i = 0; i < RARRAY_LEN(obj); i++) {
- if (i > 0) fbuffer_append(buffer, delim, delim_len);
- if (indent) {
- for (j = 0; j < depth; j++) {
- fbuffer_append(buffer, indent, indent_len);
- }
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
+ if (i > 0) {
+ fbuffer_append_char(buffer, ',');
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
+ }
+ if (RB_UNLIKELY(data->state->indent)) {
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
- generate_json(buffer, Vstate, state, rb_ary_entry(obj, i));
+ generate_json(buffer, data, RARRAY_AREF(obj, i));
}
- state->depth = --depth;
- if (array_nl) {
- fbuffer_append(buffer, array_nl, array_nl_len);
- if (indent) {
- for (j = 0; j < depth; j++) {
- fbuffer_append(buffer, indent, indent_len);
- }
+ data->depth = --depth;
+ if (RB_UNLIKELY(data->state->array_nl)) {
+ fbuffer_append_str(buffer, data->state->array_nl);
+ if (RB_UNLIKELY(data->state->indent)) {
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
}
fbuffer_append_char(buffer, ']');
}
-static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- fbuffer_append_char(buffer, '"');
-#ifdef HAVE_RUBY_ENCODING_H
- obj = rb_funcall(obj, i_encode, 1, CEncoding_UTF_8);
-#endif
- if (state->ascii_only) {
- convert_UTF8_to_JSON_ASCII(buffer, obj);
+ VALUE tmp;
+ if (rb_respond_to(obj, i_to_json)) {
+ tmp = json_call_to_json(data, obj);
+ Check_Type(tmp, T_STRING);
+ fbuffer_append_str(buffer, tmp);
} else {
- convert_UTF8_to_JSON(buffer, obj);
+ tmp = rb_funcall(obj, i_to_s, 0);
+ Check_Type(tmp, T_STRING);
+ generate_json_string(buffer, data, tmp);
}
- fbuffer_append_char(buffer, '"');
}
-static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ if (data->state->strict) {
+ generate_json_string(buffer, data, rb_sym2str(obj));
+ } else {
+ generate_json_fallback(buffer, data, obj);
+ }
+}
+
+static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
fbuffer_append(buffer, "null", 4);
}
-static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
fbuffer_append(buffer, "false", 5);
}
-static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
fbuffer_append(buffer, "true", 4);
}
-static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
fbuffer_append_long(buffer, FIX2LONG(obj));
}
-static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
VALUE tmp = rb_funcall(obj, i_to_s, 0);
- fbuffer_append(buffer, RSTRING_PAIR(tmp));
+ fbuffer_append_str(buffer, StringValue(tmp));
}
-static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
double value = RFLOAT_VALUE(obj);
- char allow_nan = state->allow_nan;
- VALUE tmp = rb_funcall(obj, i_to_s, 0);
- if (!allow_nan) {
- if (isinf(value)) {
- fbuffer_free(buffer);
- rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp));
- } else if (isnan(value)) {
- fbuffer_free(buffer);
- rb_raise(eGeneratorError, "%u: %s not allowed in JSON", __LINE__, StringValueCStr(tmp));
+ char allow_nan = data->state->allow_nan;
+ if (isinf(value) || isnan(value)) {
+ /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
+ if (!allow_nan) {
+ if (data->state->strict && data->state->as_json) {
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
+ if (casted_obj != obj) {
+ increase_depth(data);
+ generate_json(buffer, data, casted_obj);
+ data->depth--;
+ return;
+ }
+ }
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
}
+
+ VALUE tmp = rb_funcall(obj, i_to_s, 0);
+ fbuffer_append_str(buffer, tmp);
+ return;
}
- fbuffer_append(buffer, RSTRING_PAIR(tmp));
+
+ /* This implementation writes directly into the buffer. We reserve
+ * the 32 characters that fpconv_dtoa states as its maximum.
+ */
+ fbuffer_inc_capa(buffer, 32);
+ char* d = buffer->ptr + buffer->len;
+ int len = fpconv_dtoa(value, d);
+ /* fpconv_dtoa converts a float to its shortest string representation,
+ * but it adds a ".0" if this is a plain integer.
+ */
+ fbuffer_consumed(buffer, len);
}
-static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- VALUE tmp;
- VALUE klass = CLASS_OF(obj);
- if (klass == rb_cHash) {
- generate_json_object(buffer, Vstate, state, obj);
- } else if (klass == rb_cArray) {
- generate_json_array(buffer, Vstate, state, obj);
- } else if (klass == rb_cString) {
- generate_json_string(buffer, Vstate, state, obj);
- } else if (obj == Qnil) {
- generate_json_null(buffer, Vstate, state, obj);
+ VALUE fragment = RSTRUCT_GET(obj, 0);
+ Check_Type(fragment, T_STRING);
+ fbuffer_append_str(buffer, fragment);
+}
+
+static inline void generate_json_general(FBuffer *buffer, struct generate_json_data *data, VALUE obj, bool fallback)
+{
+ bool as_json_called = false;
+start:
+ if (obj == Qnil) {
+ generate_json_null(buffer, data, obj);
} else if (obj == Qfalse) {
- generate_json_false(buffer, Vstate, state, obj);
+ generate_json_false(buffer, data, obj);
} else if (obj == Qtrue) {
- generate_json_true(buffer, Vstate, state, obj);
- } else if (klass == rb_cFixnum) {
- generate_json_fixnum(buffer, Vstate, state, obj);
- } else if (klass == rb_cBignum) {
- generate_json_bignum(buffer, Vstate, state, obj);
- } else if (klass == rb_cFloat) {
- generate_json_float(buffer, Vstate, state, obj);
- } else if (rb_respond_to(obj, i_to_json)) {
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
- Check_Type(tmp, T_STRING);
- fbuffer_append(buffer, RSTRING_PAIR(tmp));
+ generate_json_true(buffer, data, obj);
+ } else if (RB_SPECIAL_CONST_P(obj)) {
+ if (RB_FIXNUM_P(obj)) {
+ generate_json_fixnum(buffer, data, obj);
+ } else if (RB_FLONUM_P(obj)) {
+ generate_json_float(buffer, data, obj);
+ } else if (RB_STATIC_SYM_P(obj)) {
+ generate_json_symbol(buffer, data, obj);
+ } else {
+ goto general;
+ }
} else {
- tmp = rb_funcall(obj, i_to_s, 0);
- Check_Type(tmp, T_STRING);
- generate_json(buffer, Vstate, state, tmp);
+ VALUE klass = RBASIC_CLASS(obj);
+ switch (RB_BUILTIN_TYPE(obj)) {
+ case T_BIGNUM:
+ generate_json_bignum(buffer, data, obj);
+ break;
+ case T_HASH:
+ if (fallback && klass != rb_cHash) goto general;
+ generate_json_object(buffer, data, obj);
+ break;
+ case T_ARRAY:
+ if (fallback && klass != rb_cArray) goto general;
+ generate_json_array(buffer, data, obj);
+ break;
+ case T_STRING:
+ if (fallback && klass != rb_cString) goto general;
+
+ if (RB_LIKELY(valid_json_string_p(obj))) {
+ raw_generate_json_string(buffer, data, obj);
+ } else if (as_json_called) {
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
+ } else {
+ obj = ensure_valid_encoding(data, obj, false, false);
+ as_json_called = true;
+ goto start;
+ }
+ break;
+ case T_SYMBOL:
+ generate_json_symbol(buffer, data, obj);
+ break;
+ case T_FLOAT:
+ if (fallback && klass != rb_cFloat) goto general;
+ generate_json_float(buffer, data, obj);
+ break;
+ case T_STRUCT:
+ if (klass != cFragment) goto general;
+ generate_json_fragment(buffer, data, obj);
+ break;
+ default:
+ general:
+ if (data->state->strict) {
+ if (RTEST(data->state->as_json) && !as_json_called) {
+ obj = json_call_as_json(data->state, obj, Qfalse);
+ as_json_called = true;
+ goto start;
+ } else {
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
+ }
+ } else {
+ generate_json_fallback(buffer, data, obj);
+ }
+ }
}
}
-static FBuffer *cState_prepare_buffer(VALUE self)
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- FBuffer *buffer = fbuffer_alloc();
- GET_STATE(self);
+ generate_json_general(buffer, data, obj, true);
+}
- if (state->object_delim) {
- fbuffer_clear(state->object_delim);
- } else {
- state->object_delim = fbuffer_alloc_with_length(16);
- }
- fbuffer_append_char(state->object_delim, ',');
- if (state->object_delim2) {
- fbuffer_clear(state->object_delim2);
- } else {
- state->object_delim2 = fbuffer_alloc_with_length(16);
- }
- fbuffer_append_char(state->object_delim2, ':');
- if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
+static void generate_json_no_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ generate_json_general(buffer, data, obj, false);
+}
- if (state->array_delim) {
- fbuffer_clear(state->array_delim);
- } else {
- state->array_delim = fbuffer_alloc_with_length(16);
- }
- fbuffer_append_char(state->array_delim, ',');
- if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
- return buffer;
+static VALUE generate_json_try(VALUE d)
+{
+ struct generate_json_data *data = (struct generate_json_data *)d;
+
+ data->func(data->buffer, data, data->obj);
+
+ return fbuffer_finalize(data->buffer);
}
-static VALUE fbuffer_to_s(FBuffer *fb)
+static VALUE generate_json_ensure(VALUE d)
{
- VALUE result = rb_str_new(FBUFFER_PAIR(fb));
- fbuffer_free(fb);
- FORCE_UTF8(result);
- return result;
+ struct generate_json_data *data = (struct generate_json_data *)d;
+ fbuffer_free(data->buffer);
+
+ return Qundef;
}
-static VALUE cState_partial_generate(VALUE self, VALUE obj)
+static inline VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
{
- FBuffer *buffer = cState_prepare_buffer(self);
GET_STATE(self);
- generate_json(buffer, self, state, obj);
- return fbuffer_to_s(buffer);
-}
-/*
- * call-seq: generate(obj)
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ FBuffer buffer = {
+ .io = RTEST(io) ? io : Qfalse,
+ };
+ fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
+
+ struct generate_json_data data = {
+ .buffer = &buffer,
+ .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
+ .state = state,
+ .depth = state->depth,
+ .obj = obj,
+ .func = func
+ };
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
+}
+
+/* call-seq:
+ * generate(obj) -> String
+ * generate(obj, anIO) -> anIO
*
* Generates a valid JSON document from object +obj+ and returns the
* result. If no valid JSON document can be created this method raises a
* GeneratorError exception.
*/
-static VALUE cState_generate(VALUE self, VALUE obj)
-{
- VALUE result = cState_partial_generate(self, obj);
- VALUE re, args[2];
- args[0] = rb_str_new2("\\A\\s*(?:\\[.*\\]|\\{.*\\})\\s*\\Z");
- args[1] = CRegexp_MULTILINE;
- re = rb_class_new_instance(2, args, rb_cRegexp);
- if (NIL_P(rb_funcall(re, i_match, 1, result))) {
- rb_raise(eGeneratorError, "only generation of JSON objects or arrays allowed");
- }
- return result;
+static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
+{
+ rb_check_arity(argc, 1, 2);
+ VALUE obj = argv[0];
+ VALUE io = argc > 1 ? argv[1] : Qnil;
+ return cState_partial_generate(self, obj, generate_json, io);
+}
+
+/* :nodoc: */
+static VALUE cState_generate_no_fallback(int argc, VALUE *argv, VALUE self)
+{
+ rb_check_arity(argc, 1, 2);
+ VALUE obj = argv[0];
+ VALUE io = argc > 1 ? argv[1] : Qnil;
+ return cState_partial_generate(self, obj, generate_json_no_fallback, io);
}
-/*
- * call-seq: new(opts = {})
- *
- * Instantiates a new State object, configured by _opts_.
- *
- * _opts_ can have the following keys:
- *
- * * *indent*: a string used to indent levels (default: ''),
- * * *space*: a string that is put after, a : or , delimiter (default: ''),
- * * *space_before*: a string that is put before a : pair delimiter (default: ''),
- * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
- * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
- * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
- * generated, otherwise an exception is thrown, if these values are
- * encountered. This options defaults to false.
- */
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
{
- VALUE opts;
- GET_STATE(self);
- state->max_nesting = 19;
- rb_scan_args(argc, argv, "01", &opts);
- if (!NIL_P(opts)) cState_configure(self, opts);
+ rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`");
return self;
}
/*
* call-seq: initialize_copy(orig)
*
- * Initializes this object from orig if it to be duplicated/cloned and returns
+ * Initializes this object from orig if it can be duplicated/cloned and returns
* it.
*/
static VALUE cState_init_copy(VALUE obj, VALUE orig)
{
JSON_Generator_State *objState, *origState;
- Data_Get_Struct(obj, JSON_Generator_State, objState);
- Data_Get_Struct(orig, JSON_Generator_State, origState);
+ if (obj == orig) return obj;
+ GET_STATE_TO(obj, objState);
+ GET_STATE_TO(orig, origState);
if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
MEMCPY(objState, origState, JSON_Generator_State, 1);
- objState->indent = fstrndup(origState->indent, origState->indent_len);
- objState->space = fstrndup(origState->space, origState->space_len);
- objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
- objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
- objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
- if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim);
- if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
- if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
+
+ RB_OBJ_WRITTEN(obj, Qundef, objState->indent);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->space);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->space_before);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->as_json);
+
return obj;
}
@@ -1035,120 +1396,92 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts)
} else if (rb_obj_is_kind_of(opts, rb_cHash)) {
return rb_funcall(self, i_new, 1, opts);
} else {
- if (NIL_P(CJSON_SAFE_STATE_PROTOTYPE)) {
- CJSON_SAFE_STATE_PROTOTYPE = rb_const_get(mJSON, i_SAFE_STATE_PROTOTYPE);
- }
- return rb_funcall(CJSON_SAFE_STATE_PROTOTYPE, i_dup, 0);
+ return rb_class_new_instance(0, NULL, cState);
}
}
/*
* call-seq: indent()
*
- * This string is used to indent levels in the JSON text.
+ * Returns the string that is used to indent levels in the JSON text.
*/
static VALUE cState_indent(VALUE self)
{
GET_STATE(self);
- return state->indent ? rb_str_new2(state->indent) : rb_str_new2("");
+ return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0));
+}
+
+static VALUE string_config(VALUE config)
+{
+ if (RTEST(config)) {
+ Check_Type(config, T_STRING);
+ if (RSTRING_LEN(config)) {
+ return rb_str_new_frozen(config);
+ }
+ }
+ return Qfalse;
}
/*
* call-seq: indent=(indent)
*
- * This string is used to indent levels in the JSON text.
+ * Sets the string that is used to indent levels in the JSON text.
*/
static VALUE cState_indent_set(VALUE self, VALUE indent)
{
- unsigned long len;
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(indent, T_STRING);
- len = RSTRING_LEN(indent);
- if (len == 0) {
- if (state->indent) {
- ruby_xfree(state->indent);
- state->indent = NULL;
- state->indent_len = 0;
- }
- } else {
- if (state->indent) ruby_xfree(state->indent);
- state->indent = strdup(RSTRING_PTR(indent));
- state->indent_len = len;
- }
+ RB_OBJ_WRITE(self, &state->indent, string_config(indent));
return Qnil;
}
/*
* call-seq: space()
*
- * This string is used to insert a space between the tokens in a JSON
+ * Returns the string that is used to insert a space between the tokens in a JSON
* string.
*/
static VALUE cState_space(VALUE self)
{
GET_STATE(self);
- return state->space ? rb_str_new2(state->space) : rb_str_new2("");
+ return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0));
}
/*
* call-seq: space=(space)
*
- * This string is used to insert a space between the tokens in a JSON
+ * Sets _space_ to the string that is used to insert a space between the tokens in a JSON
* string.
*/
static VALUE cState_space_set(VALUE self, VALUE space)
{
- unsigned long len;
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(space, T_STRING);
- len = RSTRING_LEN(space);
- if (len == 0) {
- if (state->space) {
- ruby_xfree(state->space);
- state->space = NULL;
- state->space_len = 0;
- }
- } else {
- if (state->space) ruby_xfree(state->space);
- state->space = strdup(RSTRING_PTR(space));
- state->space_len = len;
- }
+ RB_OBJ_WRITE(self, &state->space, string_config(space));
return Qnil;
}
/*
* call-seq: space_before()
*
- * This string is used to insert a space before the ':' in JSON objects.
+ * Returns the string that is used to insert a space before the ':' in JSON objects.
*/
static VALUE cState_space_before(VALUE self)
{
GET_STATE(self);
- return state->space_before ? rb_str_new2(state->space_before) : rb_str_new2("");
+ return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0));
}
/*
* call-seq: space_before=(space_before)
*
- * This string is used to insert a space before the ':' in JSON objects.
+ * Sets the string that is used to insert a space before the ':' in JSON objects.
*/
static VALUE cState_space_before_set(VALUE self, VALUE space_before)
{
- unsigned long len;
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(space_before, T_STRING);
- len = RSTRING_LEN(space_before);
- if (len == 0) {
- if (state->space_before) {
- ruby_xfree(state->space_before);
- state->space_before = NULL;
- state->space_before_len = 0;
- }
- } else {
- if (state->space_before) ruby_xfree(state->space_before);
- state->space_before = strdup(RSTRING_PTR(space_before));
- state->space_before_len = len;
- }
+ RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
return Qnil;
}
@@ -1161,7 +1494,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before)
static VALUE cState_object_nl(VALUE self)
{
GET_STATE(self);
- return state->object_nl ? rb_str_new2(state->object_nl) : rb_str_new2("");
+ return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0));
}
/*
@@ -1172,20 +1505,9 @@ static VALUE cState_object_nl(VALUE self)
*/
static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
{
- unsigned long len;
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(object_nl, T_STRING);
- len = RSTRING_LEN(object_nl);
- if (len == 0) {
- if (state->object_nl) {
- ruby_xfree(state->object_nl);
- state->object_nl = NULL;
- }
- } else {
- if (state->object_nl) ruby_xfree(state->object_nl);
- state->object_nl = strdup(RSTRING_PTR(object_nl));
- state->object_nl_len = len;
- }
+ RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
return Qnil;
}
@@ -1197,7 +1519,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
static VALUE cState_array_nl(VALUE self)
{
GET_STATE(self);
- return state->array_nl ? rb_str_new2(state->array_nl) : rb_str_new2("");
+ return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0));
}
/*
@@ -1207,23 +1529,35 @@ static VALUE cState_array_nl(VALUE self)
*/
static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
{
- unsigned long len;
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(array_nl, T_STRING);
- len = RSTRING_LEN(array_nl);
- if (len == 0) {
- if (state->array_nl) {
- ruby_xfree(state->array_nl);
- state->array_nl = NULL;
- }
- } else {
- if (state->array_nl) ruby_xfree(state->array_nl);
- state->array_nl = strdup(RSTRING_PTR(array_nl));
- state->array_nl_len = len;
- }
+ RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
return Qnil;
}
+/*
+ * call-seq: as_json()
+ *
+ * This string is put at the end of a line that holds a JSON array.
+ */
+static VALUE cState_as_json(VALUE self)
+{
+ GET_STATE(self);
+ return state->as_json;
+}
+
+/*
+ * call-seq: as_json=(as_json)
+ *
+ * This string is put at the end of a line that holds a JSON array.
+ */
+static VALUE cState_as_json_set(VALUE self, VALUE as_json)
+{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
+ return Qnil;
+}
/*
* call-seq: check_circular?
@@ -1249,6 +1583,25 @@ static VALUE cState_max_nesting(VALUE self)
return LONG2FIX(state->max_nesting);
}
+static long long_config(VALUE num)
+{
+ return RTEST(num) ? NUM2LONG(num) : 0;
+}
+
+// depth must never be negative; reject early with a clear error.
+static long depth_config(VALUE num)
+{
+ if (!RTEST(num)) return 0;
+ long d = NUM2LONG(num);
+ if (RB_UNLIKELY(d < 0)) {
+ rb_raise(rb_eArgError, "depth must be >= 0 (got %ld)", d);
+ }
+ if (RB_UNLIKELY(d > INT_MAX)) {
+ rb_raise(rb_eArgError, "depth is too large (got %ld)", d);
+ }
+ return d;
+}
+
/*
* call-seq: max_nesting=(depth)
*
@@ -1257,9 +1610,68 @@ static VALUE cState_max_nesting(VALUE self)
*/
static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
{
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(depth, T_FIXNUM);
- return state->max_nesting = FIX2LONG(depth);
+ state->max_nesting = long_config(depth);
+ return Qnil;
+}
+
+/*
+ * call-seq: script_safe
+ *
+ * If this boolean is true, the forward slashes will be escaped in
+ * the json output.
+ */
+static VALUE cState_script_safe(VALUE self)
+{
+ GET_STATE(self);
+ return state->script_safe ? Qtrue : Qfalse;
+}
+
+/*
+ * call-seq: script_safe=(enable)
+ *
+ * This sets whether or not the forward slashes will be escaped in
+ * the json output.
+ */
+static VALUE cState_script_safe_set(VALUE self, VALUE enable)
+{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ state->script_safe = RTEST(enable);
+ return Qnil;
+}
+
+/*
+ * call-seq: strict
+ *
+ * If this boolean is false, types unsupported by the JSON format will
+ * be serialized as strings.
+ * If this boolean is true, types unsupported by the JSON format will
+ * raise a JSON::GeneratorError.
+ */
+static VALUE cState_strict(VALUE self)
+{
+ GET_STATE(self);
+ return state->strict ? Qtrue : Qfalse;
+}
+
+/*
+ * call-seq: strict=(enable)
+ *
+ * This sets whether or not to serialize types unsupported by the
+ * JSON format as strings.
+ * If this boolean is false, types unsupported by the JSON format will
+ * be serialized as strings.
+ * If this boolean is true, types unsupported by the JSON format will
+ * raise a JSON::GeneratorError.
+ */
+static VALUE cState_strict_set(VALUE self, VALUE enable)
+{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ state->strict = RTEST(enable);
+ return Qnil;
}
/*
@@ -1275,9 +1687,22 @@ static VALUE cState_allow_nan_p(VALUE self)
}
/*
+ * call-seq: allow_nan=(enable)
+ *
+ * This sets whether or not to serialize NaN, Infinity, and -Infinity
+ */
+static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
+{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ state->allow_nan = RTEST(enable);
+ return Qnil;
+}
+
+/*
* call-seq: ascii_only?
*
- * Returns true, if NaN, Infinity, and -Infinity should be generated, otherwise
+ * Returns true, if only ASCII characters should be generated. Otherwise
* returns false.
*/
static VALUE cState_ascii_only_p(VALUE self)
@@ -1287,6 +1712,32 @@ static VALUE cState_ascii_only_p(VALUE self)
}
/*
+ * call-seq: ascii_only=(enable)
+ *
+ * This sets whether only ASCII characters should be generated.
+ */
+static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
+{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ state->ascii_only = RTEST(enable);
+ return Qnil;
+}
+
+static VALUE cState_allow_duplicate_key_p(VALUE self)
+{
+ GET_STATE(self);
+ switch (state->on_duplicate_key) {
+ case JSON_IGNORE:
+ return Qtrue;
+ case JSON_DEPRECATED:
+ return Qnil;
+ default:
+ return Qfalse;
+ }
+}
+
+/*
* call-seq: depth
*
* This integer returns the current depth of data structure nesting.
@@ -1305,29 +1756,176 @@ static VALUE cState_depth(VALUE self)
*/
static VALUE cState_depth_set(VALUE self, VALUE depth)
{
+ rb_check_frozen(self);
GET_STATE(self);
- Check_Type(depth, T_FIXNUM);
- return state->depth = FIX2LONG(depth);
+ state->depth = depth_config(depth);
+ return Qnil;
}
/*
+ * call-seq: buffer_initial_length
*
+ * This integer returns the current initial length of the buffer.
*/
-void Init_generator()
+static VALUE cState_buffer_initial_length(VALUE self)
+{
+ GET_STATE(self);
+ return LONG2FIX(state->buffer_initial_length);
+}
+
+static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length)
+{
+ Check_Type(buffer_initial_length, T_FIXNUM);
+ long initial_length = FIX2LONG(buffer_initial_length);
+ if (initial_length > 0) {
+ state->buffer_initial_length = initial_length;
+ }
+}
+
+/*
+ * call-seq: buffer_initial_length=(length)
+ *
+ * This sets the initial length of the buffer to +length+, if +length+ > 0,
+ * otherwise its value isn't changed.
+ */
+static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
+{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ buffer_initial_length_set(state, buffer_initial_length);
+ return Qnil;
+}
+
+struct configure_state_data {
+ JSON_Generator_State *state;
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
+};
+
+static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
+{
+ if (RTEST(data->vstate)) {
+ RB_OBJ_WRITE(data->vstate, field, value);
+ } else {
+ *field = value;
+ }
+}
+
+static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
+{
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
+ JSON_Generator_State *state = data->state;
+
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
+ else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
+ else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
+ else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
+ else if (key == sym_depth) { state->depth = depth_config(val); }
+ else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); }
+ else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
+ else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
+ else if (key == sym_strict) { state->strict = RTEST(val); }
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
+ else if (key == sym_as_json) {
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
+ state_write_value(data, &state->as_json, proc);
+ }
+ return ST_CONTINUE;
+}
+
+static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
+{
+ if (!RTEST(config)) return;
+
+ Check_Type(config, T_HASH);
+
+ if (!RHASH_SIZE(config)) return;
+
+ struct configure_state_data data = {
+ .state = state,
+ .vstate = vstate
+ };
+
+ // We assume in most cases few keys are set so it's faster to go over
+ // the provided keys than to check all possible keys.
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
+}
+
+static VALUE cState_configure(VALUE self, VALUE opts)
{
+ rb_check_frozen(self);
+ GET_STATE(self);
+ configure_state(state, self, opts);
+ return self;
+}
+
+static VALUE cState_m_do_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io, generator_func func)
+{
+ JSON_Generator_State state = {0};
+ state_init(&state);
+ configure_state(&state, Qfalse, opts);
+
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ FBuffer buffer = {
+ .io = RTEST(io) ? io : Qfalse,
+ };
+ fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
+
+ struct generate_json_data data = {
+ .buffer = &buffer,
+ .vstate = Qfalse,
+ .state = &state,
+ .depth = state.depth,
+ .obj = obj,
+ .func = func,
+ };
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
+}
+
+static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
+{
+ return cState_m_do_generate(klass, obj, opts, io, generate_json);
+}
+
+static VALUE cState_m_generate_no_fallback(VALUE klass, VALUE obj, VALUE opts, VALUE io)
+{
+ return cState_m_do_generate(klass, obj, opts, io, generate_json_no_fallback);
+}
+
+void Init_generator(void)
+{
+#ifdef HAVE_RB_EXT_RACTOR_SAFE
+ rb_ext_ractor_safe(true);
+#endif
+
+#undef rb_intern
rb_require("json/common");
mJSON = rb_define_module("JSON");
- mExt = rb_define_module_under(mJSON, "Ext");
- mGenerator = rb_define_module_under(mExt, "Generator");
+ rb_global_variable(&cFragment);
+ cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
+
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
+ VALUE mGenerator = rb_define_module_under(mExt, "Generator");
+
+ rb_global_variable(&eGeneratorError);
eGeneratorError = rb_path2class("JSON::GeneratorError");
+
+ rb_global_variable(&eNestingError);
eNestingError = rb_path2class("JSON::NestingError");
cState = rb_define_class_under(mGenerator, "State", rb_cObject);
rb_define_alloc_func(cState, cState_s_allocate);
rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
rb_define_method(cState, "initialize", cState_initialize, -1);
+ rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings
+ rb_define_private_method(cState, "_configure", cState_configure, 1);
+
rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
rb_define_method(cState, "indent", cState_indent, 0);
rb_define_method(cState, "indent=", cState_indent_set, 1);
@@ -1339,74 +1937,65 @@ void Init_generator()
rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
rb_define_method(cState, "array_nl", cState_array_nl, 0);
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
+ rb_define_method(cState, "as_json", cState_as_json, 0);
+ rb_define_method(cState, "as_json=", cState_as_json_set, 1);
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
+ rb_define_method(cState, "script_safe", cState_script_safe, 0);
+ rb_define_method(cState, "script_safe?", cState_script_safe, 0);
+ rb_define_method(cState, "script_safe=", cState_script_safe_set, 1);
+ rb_define_alias(cState, "escape_slash", "script_safe");
+ rb_define_alias(cState, "escape_slash?", "script_safe?");
+ rb_define_alias(cState, "escape_slash=", "script_safe=");
+ rb_define_method(cState, "strict", cState_strict, 0);
+ rb_define_method(cState, "strict?", cState_strict, 0);
+ rb_define_method(cState, "strict=", cState_strict_set, 1);
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
+ rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1);
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
+ rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1);
rb_define_method(cState, "depth", cState_depth, 0);
rb_define_method(cState, "depth=", cState_depth_set, 1);
- rb_define_method(cState, "configure", cState_configure, 1);
- rb_define_method(cState, "to_h", cState_to_h, 0);
- rb_define_method(cState, "[]", cState_aref, 1);
- rb_define_method(cState, "generate", cState_generate, 1);
-
- mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
- mObject = rb_define_module_under(mGeneratorMethods, "Object");
- rb_define_method(mObject, "to_json", mObject_to_json, -1);
- mHash = rb_define_module_under(mGeneratorMethods, "Hash");
- rb_define_method(mHash, "to_json", mHash_to_json, -1);
- mArray = rb_define_module_under(mGeneratorMethods, "Array");
- rb_define_method(mArray, "to_json", mArray_to_json, -1);
- mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
- rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
- mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
- rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
- mFloat = rb_define_module_under(mGeneratorMethods, "Float");
- rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
- mString = rb_define_module_under(mGeneratorMethods, "String");
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
- rb_define_method(mString, "to_json", mString_to_json, -1);
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
- mString_Extend = rb_define_module_under(mString, "Extend");
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
- mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
- rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
- mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
- rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
- mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
- rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
-
- CRegexp_MULTILINE = rb_const_get(rb_cRegexp, rb_intern("MULTILINE"));
+ rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
+ rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
+ rb_define_method(cState, "generate", cState_generate, -1);
+ rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1);
+
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
+
+ rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
+ rb_define_singleton_method(cState, "_generate_no_fallback", cState_m_generate_no_fallback, 3);
+
+ rb_global_variable(&Encoding_UTF_8);
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+
i_to_s = rb_intern("to_s");
i_to_json = rb_intern("to_json");
i_new = rb_intern("new");
- i_indent = rb_intern("indent");
- i_space = rb_intern("space");
- i_space_before = rb_intern("space_before");
- i_object_nl = rb_intern("object_nl");
- i_array_nl = rb_intern("array_nl");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_ascii_only = rb_intern("ascii_only");
- i_depth = rb_intern("depth");
- i_pack = rb_intern("pack");
- i_unpack = rb_intern("unpack");
- i_create_id = rb_intern("create_id");
- i_extend = rb_intern("extend");
- i_key_p = rb_intern("key?");
- i_aref = rb_intern("[]");
- i_send = rb_intern("__send__");
- i_respond_to_p = rb_intern("respond_to?");
- i_match = rb_intern("match");
- i_keys = rb_intern("keys");
- i_dup = rb_intern("dup");
-#ifdef HAVE_RUBY_ENCODING_H
- CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
- i_encoding = rb_intern("encoding");
i_encode = rb_intern("encode");
-#endif
- i_SAFE_STATE_PROTOTYPE = rb_intern("SAFE_STATE_PROTOTYPE");
- CJSON_SAFE_STATE_PROTOTYPE = Qnil;
+
+ sym_indent = ID2SYM(rb_intern("indent"));
+ sym_space = ID2SYM(rb_intern("space"));
+ sym_space_before = ID2SYM(rb_intern("space_before"));
+ sym_object_nl = ID2SYM(rb_intern("object_nl"));
+ sym_array_nl = ID2SYM(rb_intern("array_nl"));
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
+ sym_ascii_only = ID2SYM(rb_intern("ascii_only"));
+ sym_depth = ID2SYM(rb_intern("depth"));
+ sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length"));
+ sym_script_safe = ID2SYM(rb_intern("script_safe"));
+ sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
+ sym_strict = ID2SYM(rb_intern("strict"));
+ sym_as_json = ID2SYM(rb_intern("as_json"));
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
+
+ usascii_encindex = rb_usascii_encindex();
+ utf8_encindex = rb_utf8_encindex();
+ binary_encindex = rb_ascii8bit_encindex();
+
+ rb_require("json/ext/generator/state");
+
+ simd_impl = find_simd_implementation();
}
diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h
deleted file mode 100644
index b31218ac2e..0000000000
--- a/ext/json/generator/generator.h
+++ /dev/null
@@ -1,196 +0,0 @@
-#ifndef _GENERATOR_H_
-#define _GENERATOR_H_
-
-#include <string.h>
-#include <assert.h>
-#include <math.h>
-
-#include "ruby.h"
-
-#if HAVE_RUBY_RE_H
-#include "ruby/re.h"
-#endif
-
-#if HAVE_RE_H
-#include "re.h"
-#endif
-
-#ifdef HAVE_RUBY_ENCODING_H
-#include "ruby/encoding.h"
-#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-#else
-#define FORCE_UTF8(obj)
-#endif
-
-#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
-
-#ifndef RHASH_SIZE
-#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries)
-#endif
-
-#ifndef RFLOAT_VALUE
-#define RFLOAT_VALUE(val) (RFLOAT(val)->value)
-#endif
-
-#ifndef RARRAY_PTR
-#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
-#endif
-#ifndef RARRAY_LEN
-#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len
-#endif
-#ifndef RSTRING_PTR
-#define RSTRING_PTR(string) RSTRING(string)->ptr
-#endif
-#ifndef RSTRING_LEN
-#define RSTRING_LEN(string) RSTRING(string)->len
-#endif
-
-#define RSTRING_PAIR(string) RSTRING_PTR(string), RSTRING_LEN(string)
-
-/* fbuffer implementation */
-
-typedef struct FBufferStruct {
- unsigned long initial_length;
- char *ptr;
- unsigned long len;
- unsigned long capa;
-} FBuffer;
-
-#define FBUFFER_INITIAL_LENGTH 4096
-
-#define FBUFFER_PTR(fb) (fb->ptr)
-#define FBUFFER_LEN(fb) (fb->len)
-#define FBUFFER_CAPA(fb) (fb->capa)
-#define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb)
-
-static char *fstrndup(const char *ptr, unsigned long len);
-static FBuffer *fbuffer_alloc();
-static FBuffer *fbuffer_alloc_with_length(unsigned long initial_length);
-static void fbuffer_free(FBuffer *fb);
-static void fbuffer_clear(FBuffer *fb);
-static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len);
-static void fbuffer_append_long(FBuffer *fb, long number);
-static void fbuffer_append_char(FBuffer *fb, char newchr);
-static FBuffer *fbuffer_dup(FBuffer *fb);
-static VALUE fbuffer_to_s(FBuffer *fb);
-
-/* unicode defintions */
-
-#define UNI_STRICT_CONVERSION 1
-
-typedef unsigned long UTF32; /* at least 32 bits */
-typedef unsigned short UTF16; /* at least 16 bits */
-typedef unsigned char UTF8; /* typically 8 bits */
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_MAX_BMP (UTF32)0x0000FFFF
-#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
-#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
-#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
-
-#define UNI_SUR_HIGH_START (UTF32)0xD800
-#define UNI_SUR_HIGH_END (UTF32)0xDBFF
-#define UNI_SUR_LOW_START (UTF32)0xDC00
-#define UNI_SUR_LOW_END (UTF32)0xDFFF
-
-static const int halfShift = 10; /* used for shifting by 10 bits */
-
-static const UTF32 halfBase = 0x0010000UL;
-static const UTF32 halfMask = 0x3FFUL;
-
-static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length);
-static void unicode_escape(char *buf, UTF16 character);
-static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
-static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string);
-static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string);
-
-/* ruby api and some helpers */
-
-typedef struct JSON_Generator_StateStruct {
- char *indent;
- long indent_len;
- char *space;
- long space_len;
- char *space_before;
- long space_before_len;
- char *object_nl;
- long object_nl_len;
- char *array_nl;
- long array_nl_len;
- FBuffer *array_delim;
- FBuffer *object_delim;
- FBuffer *object_delim2;
- long max_nesting;
- char allow_nan;
- char ascii_only;
- long depth;
-} JSON_Generator_State;
-
-#define GET_STATE(self) \
- JSON_Generator_State *state; \
- Data_Get_Struct(self, JSON_Generator_State, state)
-
-#define GENERATE_JSON(type) \
- FBuffer *buffer; \
- VALUE Vstate; \
- JSON_Generator_State *state; \
- \
- rb_scan_args(argc, argv, "01", &Vstate); \
- Vstate = cState_from_state_s(cState, Vstate); \
- Data_Get_Struct(Vstate, JSON_Generator_State, state); \
- buffer = cState_prepare_buffer(Vstate); \
- generate_json_##type(buffer, Vstate, state, self); \
- return fbuffer_to_s(buffer)
-
-static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mString_included_s(VALUE self, VALUE modul);
-static VALUE mString_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mString_to_json_raw_object(VALUE self);
-static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self);
-static VALUE mString_Extend_json_create(VALUE self, VALUE o);
-static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self);
-static void State_free(JSON_Generator_State *state);
-static JSON_Generator_State *State_allocate();
-static VALUE cState_s_allocate(VALUE klass);
-static VALUE cState_configure(VALUE self, VALUE opts);
-static VALUE cState_to_h(VALUE self);
-static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-static VALUE cState_partial_generate(VALUE self, VALUE obj);
-static VALUE cState_generate(VALUE self, VALUE obj);
-static VALUE cState_initialize(int argc, VALUE *argv, VALUE self);
-static VALUE cState_from_state_s(VALUE self, VALUE opts);
-static VALUE cState_indent(VALUE self);
-static VALUE cState_indent_set(VALUE self, VALUE indent);
-static VALUE cState_space(VALUE self);
-static VALUE cState_space_set(VALUE self, VALUE space);
-static VALUE cState_space_before(VALUE self);
-static VALUE cState_space_before_set(VALUE self, VALUE space_before);
-static VALUE cState_object_nl(VALUE self);
-static VALUE cState_object_nl_set(VALUE self, VALUE object_nl);
-static VALUE cState_array_nl(VALUE self);
-static VALUE cState_array_nl_set(VALUE self, VALUE array_nl);
-static VALUE cState_max_nesting(VALUE self);
-static VALUE cState_max_nesting_set(VALUE self, VALUE depth);
-static VALUE cState_allow_nan_p(VALUE self);
-static VALUE cState_ascii_only_p(VALUE self);
-static VALUE cState_depth(VALUE self);
-static VALUE cState_depth_set(VALUE self, VALUE depth);
-static FBuffer *cState_prepare_buffer(VALUE self);
-
-#endif
diff --git a/ext/json/json.gemspec b/ext/json/json.gemspec
new file mode 100644
index 0000000000..5575731025
--- /dev/null
+++ b/ext/json/json.gemspec
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line|
+ /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1
+end rescue nil
+
+spec = Gem::Specification.new do |s|
+ java_ext = Gem::Platform === s.platform && s.platform =~ 'java' || RUBY_ENGINE == 'jruby'
+
+ s.name = "json"
+ s.version = version
+
+ s.summary = "JSON Implementation for Ruby"
+ s.homepage = "https://github.com/ruby/json"
+ s.metadata = {
+ 'bug_tracker_uri' => 'https://github.com/ruby/json/issues',
+ 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md',
+ 'documentation_uri' => 'https://docs.ruby-lang.org/en/master/JSON.html',
+ 'homepage_uri' => s.homepage,
+ 'source_code_uri' => 'https://github.com/ruby/json',
+ }
+
+ s.required_ruby_version = Gem::Requirement.new(">= 2.7")
+
+ if java_ext
+ s.description = "A JSON implementation as a JRuby extension."
+ s.author = "Daniel Luz"
+ s.email = "dev+ruby@mernen.com"
+ else
+ s.description = "This is a JSON implementation as a Ruby extension in C."
+ s.authors = ["Florian Frank"]
+ s.email = "flori@ping.de"
+ end
+
+ s.licenses = ["Ruby"]
+
+ s.extra_rdoc_files = ["README.md"]
+ s.rdoc_options = ["--title", "JSON implementation for Ruby", "--main", "README.md"]
+
+ s.files = [
+ "CHANGES.md",
+ "COPYING",
+ "BSDL",
+ "LEGAL",
+ "README.md",
+ "json.gemspec",
+ ] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__))
+
+ if java_ext
+ s.platform = 'java'
+ s.files += Dir["lib/json/ext/**/*.jar"]
+ else
+ s.extensions = Dir["ext/json/**/extconf.rb"]
+ s.files += Dir["ext/json/**/*.{c,h,rb}"]
+ end
+end
+
+if RUBY_ENGINE == 'jruby' && $0 == __FILE__
+ Gem::Builder.new(spec).build
+else
+ spec
+end
diff --git a/ext/json/json.h b/ext/json/json.h
new file mode 100644
index 0000000000..cf9420d4dd
--- /dev/null
+++ b/ext/json/json.h
@@ -0,0 +1,134 @@
+#ifndef _JSON_H_
+#define _JSON_H_
+
+#include "ruby.h"
+#include "ruby/encoding.h"
+#include <stdint.h>
+
+#ifndef RBIMPL_ASSERT_OR_ASSUME
+# define RBIMPL_ASSERT_OR_ASSUME(x)
+#endif
+
+#if defined(RUBY_DEBUG) && RUBY_DEBUG
+# define JSON_ASSERT RUBY_ASSERT
+#else
+# ifdef JSON_DEBUG
+# include <assert.h>
+# define JSON_ASSERT(x) assert(x)
+# else
+# define JSON_ASSERT(x)
+# endif
+#endif
+
+/* shims */
+
+#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
+# define INT64T2NUM(x) LL2NUM(x)
+# define UINT64T2NUM(x) ULL2NUM(x)
+#elif SIZEOF_UINT64_T == SIZEOF_LONG
+# define INT64T2NUM(x) LONG2NUM(x)
+# define UINT64T2NUM(x) ULONG2NUM(x)
+#else
+# error No uint64_t conversion
+#endif
+
+/* This is the fallback definition from Ruby 3.4 */
+#ifndef RBIMPL_STDBOOL_H
+#if defined(__cplusplus)
+# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
+# include <cstdbool>
+# endif
+#elif defined(HAVE_STDBOOL_H)
+# include <stdbool.h>
+#elif !defined(HAVE__BOOL)
+typedef unsigned char _Bool;
+# define bool _Bool
+# define true ((_Bool)+1)
+# define false ((_Bool)+0)
+# define __bool_true_false_are_defined
+#endif
+#endif
+
+#ifndef HAVE_RUBY_XFREE_SIZED
+static inline void ruby_xfree_sized(void *ptr, size_t oldsize)
+{
+ ruby_xfree(ptr);
+}
+
+static inline void *ruby_xrealloc2_sized(void *ptr, size_t new_elems, size_t elem_size, size_t old_elems)
+{
+ return ruby_xrealloc2(ptr, new_elems, elem_size);
+}
+#endif
+
+# define JSON_SIZED_REALLOC_N(v, T, m, n) \
+ ((v) = (T *)ruby_xrealloc2_sized((void *)(v), (m), sizeof(T), (n)))
+
+# define JSON_SIZED_FREE(v) ruby_xfree_sized((void *)(v), sizeof(*(v)))
+# define JSON_SIZED_FREE_N(v, n) ruby_xfree_sized((void *)(v), sizeof(*(v)) * (n))
+
+#ifndef HAVE_RB_EXT_RACTOR_SAFE
+# undef RUBY_TYPED_FROZEN_SHAREABLE
+# define RUBY_TYPED_FROZEN_SHAREABLE 0
+#endif
+
+#ifdef RUBY_TYPED_EMBEDDABLE
+# define HAVE_RUBY_TYPED_EMBEDDABLE 1
+#else
+# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE
+# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE
+# define HAVE_RUBY_TYPED_EMBEDDABLE 1
+# else
+# define RUBY_TYPED_EMBEDDABLE 0
+# endif
+#endif
+
+#ifndef NORETURN
+#if defined(__has_attribute) && __has_attribute(noreturn)
+#define NORETURN(x) __attribute__((noreturn)) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+
+#ifndef NOINLINE
+#if defined(__has_attribute) && __has_attribute(noinline)
+#define NOINLINE(x) __attribute__((noinline)) x
+#else
+#define NOINLINE(x) x
+#endif
+#endif
+
+#ifndef ALWAYS_INLINE
+#if defined(__has_attribute) && __has_attribute(always_inline)
+#define ALWAYS_INLINE(x) inline __attribute__((always_inline)) x
+#else
+#define ALWAYS_INLINE(x) inline x
+#endif
+#endif
+
+#ifndef RB_UNLIKELY
+#define RB_UNLIKELY(expr) expr
+#endif
+
+#ifndef RB_LIKELY
+#define RB_LIKELY(expr) expr
+#endif
+
+#ifndef MAYBE_UNUSED
+# define MAYBE_UNUSED(x) x
+#endif
+
+#ifdef RUBY_DEBUG
+#ifndef JSON_DEBUG
+#define JSON_DEBUG RUBY_DEBUG
+#endif
+#endif
+
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX
+#define JSON_CPU_LITTLE_ENDIAN_64BITS 1
+#else
+#define JSON_CPU_LITTLE_ENDIAN_64BITS 0
+#endif
+
+#endif // _JSON_H_
diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb
index 789b0de546..26d601926f 100644
--- a/ext/json/lib/json.rb
+++ b/ext/json/lib/json.rb
@@ -1,10 +1,675 @@
+# frozen_string_literal: true
require 'json/common'
+
+##
+# = JavaScript \Object Notation (\JSON)
+#
+# \JSON is a lightweight data-interchange format.
+#
+# \JSON is easy for us humans to read and write,
+# and equally simple for machines to read (parse) and write (generate).
+#
+# \JSON is language-independent, making it an ideal interchange format
+# for applications in differing programming languages
+# and on differing operating systems.
+#
+# == \JSON Values
+#
+# A \JSON value is one of the following:
+# - Double-quoted text: <tt>"foo"</tt>.
+# - Number: +1+, +1.0+, +2.0e2+.
+# - Boolean: +true+, +false+.
+# - Null: +null+.
+# - \Array: an ordered list of values, enclosed by square brackets:
+# ["foo", 1, 1.0, 2.0e2, true, false, null]
+#
+# - \Object: a collection of name/value pairs, enclosed by curly braces;
+# each name is double-quoted text;
+# the values may be any \JSON values:
+# {"a": "foo", "b": 1, "c": 1.0, "d": 2.0e2, "e": true, "f": false, "g": null}
+#
+# A \JSON array or object may contain nested arrays, objects, and scalars
+# to any depth:
+# {"foo": {"bar": 1, "baz": 2}, "bat": [0, 1, 2]}
+# [{"foo": 0, "bar": 1}, ["baz", 2]]
+#
+# == Using \Module \JSON
+#
+# To make module \JSON available in your code, begin with:
+# require 'json'
+#
+# All examples here assume that this has been done.
+#
+# === Parsing \JSON
+#
+# You can parse a \String containing \JSON data using
+# either of two methods:
+# - <tt>JSON.parse(source, opts)</tt>
+# - <tt>JSON.parse!(source, opts)</tt>
+#
+# where
+# - +source+ is a Ruby object.
+# - +opts+ is a \Hash object containing options
+# that control both input allowed and output formatting.
+#
+# The difference between the two methods
+# is that JSON.parse! omits some checks
+# and may not be safe for some +source+ data;
+# use it only for data from trusted sources.
+# Use the safer method JSON.parse for less trusted sources.
+#
+# ==== Parsing \JSON Arrays
+#
+# When +source+ is a \JSON array, JSON.parse by default returns a Ruby \Array:
+# json = '["foo", 1, 1.0, 2.0e2, true, false, null]'
+# ruby = JSON.parse(json)
+# ruby # => ["foo", 1, 1.0, 200.0, true, false, nil]
+# ruby.class # => Array
+#
+# The \JSON array may contain nested arrays, objects, and scalars
+# to any depth:
+# json = '[{"foo": 0, "bar": 1}, ["baz", 2]]'
+# JSON.parse(json) # => [{"foo"=>0, "bar"=>1}, ["baz", 2]]
+#
+# ==== Parsing \JSON \Objects
+#
+# When the source is a \JSON object, JSON.parse by default returns a Ruby \Hash:
+# json = '{"a": "foo", "b": 1, "c": 1.0, "d": 2.0e2, "e": true, "f": false, "g": null}'
+# ruby = JSON.parse(json)
+# ruby # => {"a"=>"foo", "b"=>1, "c"=>1.0, "d"=>200.0, "e"=>true, "f"=>false, "g"=>nil}
+# ruby.class # => Hash
+#
+# The \JSON object may contain nested arrays, objects, and scalars
+# to any depth:
+# json = '{"foo": {"bar": 1, "baz": 2}, "bat": [0, 1, 2]}'
+# JSON.parse(json) # => {"foo"=>{"bar"=>1, "baz"=>2}, "bat"=>[0, 1, 2]}
+#
+# ==== Parsing \JSON Scalars
+#
+# When the source is a \JSON scalar (not an array or object),
+# JSON.parse returns a Ruby scalar.
+#
+# \String:
+# ruby = JSON.parse('"foo"')
+# ruby # => 'foo'
+# ruby.class # => String
+# \Integer:
+# ruby = JSON.parse('1')
+# ruby # => 1
+# ruby.class # => Integer
+# \Float:
+# ruby = JSON.parse('1.0')
+# ruby # => 1.0
+# ruby.class # => Float
+# ruby = JSON.parse('2.0e2')
+# ruby # => 200
+# ruby.class # => Float
+# Boolean:
+# ruby = JSON.parse('true')
+# ruby # => true
+# ruby.class # => TrueClass
+# ruby = JSON.parse('false')
+# ruby # => false
+# ruby.class # => FalseClass
+# Null:
+# ruby = JSON.parse('null')
+# ruby # => nil
+# ruby.class # => NilClass
+#
+# ==== Parsing Options
+#
+# ====== Input Options
+#
+# Option +max_nesting+ (\Integer) specifies the maximum nesting depth allowed;
+# defaults to +100+; specify +false+ to disable depth checking.
+#
+# With the default, +false+:
+# source = '[0, [1, [2, [3]]]]'
+# ruby = JSON.parse(source)
+# ruby # => [0, [1, [2, [3]]]]
+# Too deep:
+# # Raises JSON::NestingError (nesting of 2 is too deep):
+# JSON.parse(source, {max_nesting: 1})
+# Bad value:
+# # Raises TypeError (wrong argument type Symbol (expected Fixnum)):
+# JSON.parse(source, {max_nesting: :foo})
+#
+# ---
+#
+# Option +allow_duplicate_key+ specifies whether duplicate keys in objects
+# should be ignored or cause an error to be raised:
+#
+# When not specified:
+# # The last value is used and a deprecation warning emitted.
+# JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
+# # warning: detected duplicate keys in JSON object.
+# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
+#
+# When set to `+true+`
+# # The last value is used.
+# JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
+#
+# When set to `+false+`, the future default:
+# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError)
+#
+# ---
+#
+# Option +allow_nan+ (boolean) specifies whether to allow
+# NaN, Infinity, and MinusInfinity in +source+;
+# defaults to +false+.
+#
+# With the default, +false+:
+# # Raises JSON::ParserError (225: unexpected token at '[NaN]'):
+# JSON.parse('[NaN]')
+# # Raises JSON::ParserError (232: unexpected token at '[Infinity]'):
+# JSON.parse('[Infinity]')
+# # Raises JSON::ParserError (248: unexpected token at '[-Infinity]'):
+# JSON.parse('[-Infinity]')
+# Allow:
+# source = '[NaN, Infinity, -Infinity]'
+# ruby = JSON.parse(source, {allow_nan: true})
+# ruby # => [NaN, Infinity, -Infinity]
+#
+# ---
+#
+# Option +allow_trailing_comma+ (boolean) specifies whether to allow
+# trailing commas in objects and arrays;
+# defaults to +false+.
+#
+# With the default, +false+:
+# JSON.parse('[1,]') # unexpected character: ']' at line 1 column 4 (JSON::ParserError)
+#
+# When enabled:
+# JSON.parse('[1,]', allow_trailing_comma: true) # => [1]
+#
+# ---
+#
+# Option +allow_control_characters+ (boolean) specifies whether to allow
+# unescaped ASCII control characters, such as newlines, in strings;
+# defaults to +false+.
+#
+# With the default, +false+:
+# JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError)
+#
+# When enabled:
+# JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld"
+#
+# ---
+#
+# Option +allow_invalid_escape+ (boolean) specifies whether to ignore backslahes that are followed
+# by an invalid escape character in strings;
+# defaults to +false+.
+#
+# With the default, +false+:
+# JSON.parse('"Hell\o"') # invalid escape character in string (JSON::ParserError)
+#
+# When enabled:
+# JSON.parse('"Hell\o"', allow_invalid_escape: true) # => "Hello"
+#
+# ====== Output Options
+#
+# Option +freeze+ (boolean) specifies whether the returned objects will be frozen;
+# defaults to +false+.
+#
+# Option +symbolize_names+ (boolean) specifies whether returned \Hash keys
+# should be Symbols;
+# defaults to +false+ (use Strings).
+#
+# With the default, +false+:
+# source = '{"a": "foo", "b": 1.0, "c": true, "d": false, "e": null}'
+# ruby = JSON.parse(source)
+# ruby # => {"a"=>"foo", "b"=>1.0, "c"=>true, "d"=>false, "e"=>nil}
+# Use Symbols:
+# ruby = JSON.parse(source, {symbolize_names: true})
+# ruby # => {:a=>"foo", :b=>1.0, :c=>true, :d=>false, :e=>nil}
+#
+# ---
+#
+# Option +object_class+ (\Class) specifies the Ruby class to be used
+# for each \JSON object;
+# defaults to \Hash.
+#
+# With the default, \Hash:
+# source = '{"a": "foo", "b": 1.0, "c": true, "d": false, "e": null}'
+# ruby = JSON.parse(source)
+# ruby.class # => Hash
+# Use class \OpenStruct:
+# ruby = JSON.parse(source, {object_class: OpenStruct})
+# ruby # => #<OpenStruct a="foo", b=1.0, c=true, d=false, e=nil>
+#
+# ---
+#
+# Option +array_class+ (\Class) specifies the Ruby class to be used
+# for each \JSON array;
+# defaults to \Array.
+#
+# With the default, \Array:
+# source = '["foo", 1.0, true, false, null]'
+# ruby = JSON.parse(source)
+# ruby.class # => Array
+# Use class \Set:
+# ruby = JSON.parse(source, {array_class: Set})
+# ruby # => #<Set: {"foo", 1.0, true, false, nil}>
+#
+# ---
+#
+# Option +create_additions+ (boolean) specifies whether to use \JSON additions in parsing.
+# See {\JSON Additions}[#module-JSON-label-JSON+Additions].
+#
+# === Generating \JSON
+#
+# To generate a Ruby \String containing \JSON data,
+# use method <tt>JSON.generate(source, opts)</tt>, where
+# - +source+ is a Ruby object.
+# - +opts+ is a \Hash object containing options
+# that control both input allowed and output formatting.
+#
+# ==== Generating \JSON from Arrays
+#
+# When the source is a Ruby \Array, JSON.generate returns
+# a \String containing a \JSON array:
+# ruby = [0, 's', :foo]
+# json = JSON.generate(ruby)
+# json # => '[0,"s","foo"]'
+#
+# The Ruby \Array array may contain nested arrays, hashes, and scalars
+# to any depth:
+# ruby = [0, [1, 2], {foo: 3, bar: 4}]
+# json = JSON.generate(ruby)
+# json # => '[0,[1,2],{"foo":3,"bar":4}]'
+#
+# ==== Generating \JSON from Hashes
+#
+# When the source is a Ruby \Hash, JSON.generate returns
+# a \String containing a \JSON object:
+# ruby = {foo: 0, bar: 's', baz: :bat}
+# json = JSON.generate(ruby)
+# json # => '{"foo":0,"bar":"s","baz":"bat"}'
+#
+# The Ruby \Hash array may contain nested arrays, hashes, and scalars
+# to any depth:
+# ruby = {foo: [0, 1], bar: {baz: 2, bat: 3}, bam: :bad}
+# json = JSON.generate(ruby)
+# json # => '{"foo":[0,1],"bar":{"baz":2,"bat":3},"bam":"bad"}'
+#
+# ==== Generating \JSON from Other Objects
+#
+# When the source is neither an \Array nor a \Hash,
+# the generated \JSON data depends on the class of the source.
+#
+# When the source is a Ruby \Integer or \Float, JSON.generate returns
+# a \String containing a \JSON number:
+# JSON.generate(42) # => '42'
+# JSON.generate(0.42) # => '0.42'
+#
+# When the source is a Ruby \String, JSON.generate returns
+# a \String containing a \JSON string (with double-quotes):
+# JSON.generate('A string') # => '"A string"'
+#
+# When the source is +true+, +false+ or +nil+, JSON.generate returns
+# a \String containing the corresponding \JSON token:
+# JSON.generate(true) # => 'true'
+# JSON.generate(false) # => 'false'
+# JSON.generate(nil) # => 'null'
+#
+# When the source is none of the above, JSON.generate returns
+# a \String containing a \JSON string representation of the source:
+# JSON.generate(:foo) # => '"foo"'
+# JSON.generate(Complex(0, 0)) # => '"0+0i"'
+# JSON.generate(Dir.new('.')) # => '"#<Dir>"'
+#
+# ==== Generating Options
+#
+# ====== Input Options
+#
+# Option +allow_nan+ (boolean) specifies whether
+# +NaN+, +Infinity+, and <tt>-Infinity</tt> may be generated;
+# defaults to +false+.
+#
+# With the default, +false+:
+# # Raises JSON::GeneratorError (920: NaN not allowed in JSON):
+# JSON.generate(JSON::NaN)
+# # Raises JSON::GeneratorError (917: Infinity not allowed in JSON):
+# JSON.generate(JSON::Infinity)
+# # Raises JSON::GeneratorError (917: -Infinity not allowed in JSON):
+# JSON.generate(JSON::MinusInfinity)
+#
+# Allow:
+# ruby = [Float::NAN, Float::INFINITY, JSON::NaN, JSON::Infinity, JSON::MinusInfinity]
+# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,NaN,Infinity,-Infinity]'
+#
+# ---
+#
+# Option +allow_duplicate_key+ (boolean) specifies whether
+# hashes with duplicate keys should be allowed or produce an error.
+# defaults to emit a deprecation warning.
+#
+# With the default, (not set):
+# Warning[:deprecated] = true
+# JSON.generate({ foo: 1, "foo" => 2 })
+# # warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}.
+# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
+# # => '{"foo":1,"foo":2}'
+#
+# With <tt>false</tt>
+# JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false)
+# # detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError)
+#
+# In version 3.0, <tt>false</tt> will become the default.
+#
+# ---
+#
+# Option +max_nesting+ (\Integer) specifies the maximum nesting depth
+# in +obj+; defaults to +100+.
+#
+# With the default, +100+:
+# obj = [[[[[[0]]]]]]
+# JSON.generate(obj) # => '[[[[[[0]]]]]]'
+#
+# Too deep:
+# # Raises JSON::NestingError (nesting of 2 is too deep):
+# JSON.generate(obj, max_nesting: 2)
+#
+# ====== Escaping Options
+#
+# Options +script_safe+ (boolean) specifies wether <tt>'\u2028'</tt>, <tt>'\u2029'</tt>
+# and <tt>'/'</tt> should be escaped as to make the JSON object safe to interpolate in script
+# tags.
+#
+# Options +ascii_only+ (boolean) specifies wether all characters outside the ASCII range
+# should be escaped.
+#
+# ====== Output Options
+#
+# The default formatting options generate the most compact
+# \JSON data, all on one line and with no whitespace.
+#
+# You can use these formatting options to generate
+# \JSON data in a more open format, using whitespace.
+# See also JSON.pretty_generate.
+#
+# - Option +array_nl+ (\String) specifies a string (usually a newline)
+# to be inserted after each \JSON array; defaults to the empty \String, <tt>''</tt>.
+# - Option +object_nl+ (\String) specifies a string (usually a newline)
+# to be inserted after each \JSON object; defaults to the empty \String, <tt>''</tt>.
+# - Option +indent+ (\String) specifies the string (usually spaces) to be
+# used for indentation; defaults to the empty \String, <tt>''</tt>;
+# defaults to the empty \String, <tt>''</tt>;
+# has no effect unless options +array_nl+ or +object_nl+ specify newlines.
+# - Option +space+ (\String) specifies a string (usually a space) to be
+# inserted after the colon in each \JSON object's pair;
+# defaults to the empty \String, <tt>''</tt>.
+# - Option +space_before+ (\String) specifies a string (usually a space) to be
+# inserted before the colon in each \JSON object's pair;
+# defaults to the empty \String, <tt>''</tt>.
+#
+# In this example, +obj+ is used first to generate the shortest
+# \JSON data (no whitespace), then again with all formatting options
+# specified:
+#
+# obj = {foo: [:bar, :baz], bat: {bam: 0, bad: 1}}
+# json = JSON.generate(obj)
+# puts 'Compact:', json
+# opts = {
+# array_nl: "\n",
+# object_nl: "\n",
+# indent: ' ',
+# space_before: ' ',
+# space: ' '
+# }
+# puts 'Open:', JSON.generate(obj, opts)
+#
+# Output:
+# Compact:
+# {"foo":["bar","baz"],"bat":{"bam":0,"bad":1}}
+# Open:
+# {
+# "foo" : [
+# "bar",
+# "baz"
+# ],
+# "bat" : {
+# "bam" : 0,
+# "bad" : 1
+# }
+# }
+#
+# == \JSON Additions
+#
+# Note that JSON Additions must only be used with trusted data, and is
+# deprecated.
+#
+# When you "round trip" a non-\String object from Ruby to \JSON and back,
+# you have a new \String, instead of the object you began with:
+# ruby0 = Range.new(0, 2)
+# json = JSON.generate(ruby0)
+# json # => '0..2"'
+# ruby1 = JSON.parse(json)
+# ruby1 # => '0..2'
+# ruby1.class # => String
+#
+# You can use \JSON _additions_ to preserve the original object.
+# The addition is an extension of a ruby class, so that:
+# - \JSON.generate stores more information in the \JSON string.
+# - \JSON.parse, called with option +create_additions+,
+# uses that information to create a proper Ruby object.
+#
+# This example shows a \Range being generated into \JSON
+# and parsed back into Ruby, both without and with
+# the addition for \Range:
+# ruby = Range.new(0, 2)
+# # This passage does not use the addition for Range.
+# json0 = JSON.generate(ruby)
+# ruby0 = JSON.parse(json0)
+# # This passage uses the addition for Range.
+# require 'json/add/range'
+# json1 = JSON.generate(ruby)
+# ruby1 = JSON.parse(json1, create_additions: true)
+# # Make a nice display.
+# display = <<~EOT
+# Generated JSON:
+# Without addition: #{json0} (#{json0.class})
+# With addition: #{json1} (#{json1.class})
+# Parsed JSON:
+# Without addition: #{ruby0.inspect} (#{ruby0.class})
+# With addition: #{ruby1.inspect} (#{ruby1.class})
+# EOT
+# puts display
+#
+# This output shows the different results:
+# Generated JSON:
+# Without addition: "0..2" (String)
+# With addition: {"json_class":"Range","a":[0,2,false]} (String)
+# Parsed JSON:
+# Without addition: "0..2" (String)
+# With addition: 0..2 (Range)
+#
+# The \JSON module includes additions for certain classes.
+# You can also craft custom additions.
+# See {Custom \JSON Additions}[#module-JSON-label-Custom+JSON+Additions].
+#
+# === Built-in Additions
+#
+# The \JSON module includes additions for certain classes.
+# To use an addition, +require+ its source:
+# - BigDecimal: <tt>require 'json/add/bigdecimal'</tt>
+# - Complex: <tt>require 'json/add/complex'</tt>
+# - Date: <tt>require 'json/add/date'</tt>
+# - DateTime: <tt>require 'json/add/date_time'</tt>
+# - Exception: <tt>require 'json/add/exception'</tt>
+# - OpenStruct: <tt>require 'json/add/ostruct'</tt>
+# - Range: <tt>require 'json/add/range'</tt>
+# - Rational: <tt>require 'json/add/rational'</tt>
+# - Regexp: <tt>require 'json/add/regexp'</tt>
+# - Set: <tt>require 'json/add/set'</tt>
+# - Struct: <tt>require 'json/add/struct'</tt>
+# - Symbol: <tt>require 'json/add/symbol'</tt>
+# - Time: <tt>require 'json/add/time'</tt>
+#
+# To reduce punctuation clutter, the examples below
+# show the generated \JSON via +puts+, rather than the usual +inspect+,
+#
+# \BigDecimal:
+# require 'json/add/bigdecimal'
+# ruby0 = BigDecimal(0) # 0.0
+# json = JSON.generate(ruby0) # {"json_class":"BigDecimal","b":"27:0.0"}
+# ruby1 = JSON.parse(json, create_additions: true) # 0.0
+# ruby1.class # => BigDecimal
+#
+# \Complex:
+# require 'json/add/complex'
+# ruby0 = Complex(1+0i) # 1+0i
+# json = JSON.generate(ruby0) # {"json_class":"Complex","r":1,"i":0}
+# ruby1 = JSON.parse(json, create_additions: true) # 1+0i
+# ruby1.class # Complex
+#
+# \Date:
+# require 'json/add/date'
+# ruby0 = Date.today # 2020-05-02
+# json = JSON.generate(ruby0) # {"json_class":"Date","y":2020,"m":5,"d":2,"sg":2299161.0}
+# ruby1 = JSON.parse(json, create_additions: true) # 2020-05-02
+# ruby1.class # Date
+#
+# \DateTime:
+# require 'json/add/date_time'
+# ruby0 = DateTime.now # 2020-05-02T10:38:13-05:00
+# json = JSON.generate(ruby0) # {"json_class":"DateTime","y":2020,"m":5,"d":2,"H":10,"M":38,"S":13,"of":"-5/24","sg":2299161.0}
+# ruby1 = JSON.parse(json, create_additions: true) # 2020-05-02T10:38:13-05:00
+# ruby1.class # DateTime
+#
+# \Exception (and its subclasses including \RuntimeError):
+# require 'json/add/exception'
+# ruby0 = Exception.new('A message') # A message
+# json = JSON.generate(ruby0) # {"json_class":"Exception","m":"A message","b":null}
+# ruby1 = JSON.parse(json, create_additions: true) # A message
+# ruby1.class # Exception
+# ruby0 = RuntimeError.new('Another message') # Another message
+# json = JSON.generate(ruby0) # {"json_class":"RuntimeError","m":"Another message","b":null}
+# ruby1 = JSON.parse(json, create_additions: true) # Another message
+# ruby1.class # RuntimeError
+#
+# \OpenStruct:
+# require 'json/add/ostruct'
+# ruby0 = OpenStruct.new(name: 'Matz', language: 'Ruby') # #<OpenStruct name="Matz", language="Ruby">
+# json = JSON.generate(ruby0) # {"json_class":"OpenStruct","t":{"name":"Matz","language":"Ruby"}}
+# ruby1 = JSON.parse(json, create_additions: true) # #<OpenStruct name="Matz", language="Ruby">
+# ruby1.class # OpenStruct
+#
+# \Range:
+# require 'json/add/range'
+# ruby0 = Range.new(0, 2) # 0..2
+# json = JSON.generate(ruby0) # {"json_class":"Range","a":[0,2,false]}
+# ruby1 = JSON.parse(json, create_additions: true) # 0..2
+# ruby1.class # Range
+#
+# \Rational:
+# require 'json/add/rational'
+# ruby0 = Rational(1, 3) # 1/3
+# json = JSON.generate(ruby0) # {"json_class":"Rational","n":1,"d":3}
+# ruby1 = JSON.parse(json, create_additions: true) # 1/3
+# ruby1.class # Rational
+#
+# \Regexp:
+# require 'json/add/regexp'
+# ruby0 = Regexp.new('foo') # (?-mix:foo)
+# json = JSON.generate(ruby0) # {"json_class":"Regexp","o":0,"s":"foo"}
+# ruby1 = JSON.parse(json, create_additions: true) # (?-mix:foo)
+# ruby1.class # Regexp
+#
+# \Set:
+# require 'json/add/set'
+# ruby0 = Set.new([0, 1, 2]) # #<Set: {0, 1, 2}>
+# json = JSON.generate(ruby0) # {"json_class":"Set","a":[0,1,2]}
+# ruby1 = JSON.parse(json, create_additions: true) # #<Set: {0, 1, 2}>
+# ruby1.class # Set
+#
+# \Struct:
+# require 'json/add/struct'
+# Customer = Struct.new(:name, :address) # Customer
+# ruby0 = Customer.new("Dave", "123 Main") # #<struct Customer name="Dave", address="123 Main">
+# json = JSON.generate(ruby0) # {"json_class":"Customer","v":["Dave","123 Main"]}
+# ruby1 = JSON.parse(json, create_additions: true) # #<struct Customer name="Dave", address="123 Main">
+# ruby1.class # Customer
+#
+# \Symbol:
+# require 'json/add/symbol'
+# ruby0 = :foo # foo
+# json = JSON.generate(ruby0) # {"json_class":"Symbol","s":"foo"}
+# ruby1 = JSON.parse(json, create_additions: true) # foo
+# ruby1.class # Symbol
+#
+# \Time:
+# require 'json/add/time'
+# ruby0 = Time.now # 2020-05-02 11:28:26 -0500
+# json = JSON.generate(ruby0) # {"json_class":"Time","s":1588436906,"n":840560000}
+# ruby1 = JSON.parse(json, create_additions: true) # 2020-05-02 11:28:26 -0500
+# ruby1.class # Time
+#
+#
+# === Custom \JSON Additions
+#
+# In addition to the \JSON additions provided,
+# you can craft \JSON additions of your own,
+# either for Ruby built-in classes or for user-defined classes.
+#
+# Here's a user-defined class +Foo+:
+# class Foo
+# attr_accessor :bar, :baz
+# def initialize(bar, baz)
+# self.bar = bar
+# self.baz = baz
+# end
+# end
+#
+# Here's the \JSON addition for it:
+# # Extend class Foo with JSON addition.
+# class Foo
+# # Serialize Foo object with its class name and arguments
+# def to_json(*args)
+# {
+# JSON.create_id => self.class.name,
+# 'a' => [ bar, baz ]
+# }.to_json(*args)
+# end
+# # Deserialize JSON string by constructing new Foo object with arguments.
+# def self.json_create(object)
+# new(*object['a'])
+# end
+# end
+#
+# Demonstration:
+# require 'json'
+# # This Foo object has no custom addition.
+# foo0 = Foo.new(0, 1)
+# json0 = JSON.generate(foo0)
+# obj0 = JSON.parse(json0)
+# # Lood the custom addition.
+# require_relative 'foo_addition'
+# # This foo has the custom addition.
+# foo1 = Foo.new(0, 1)
+# json1 = JSON.generate(foo1)
+# obj1 = JSON.parse(json1, create_additions: true)
+# # Make a nice display.
+# display = <<~EOT
+# Generated JSON:
+# Without custom addition: #{json0} (#{json0.class})
+# With custom addition: #{json1} (#{json1.class})
+# Parsed JSON:
+# Without custom addition: #{obj0.inspect} (#{obj0.class})
+# With custom addition: #{obj1.inspect} (#{obj1.class})
+# EOT
+# puts display
+#
+# Output:
+#
+# Generated JSON:
+# Without custom addition: "#<Foo:0x0000000006534e80>" (String)
+# With custom addition: {"json_class":"Foo","a":[0,1]} (String)
+# Parsed JSON:
+# Without custom addition: "#<Foo:0x0000000006534e80>" (String)
+# With custom addition: #<Foo:0x0000000006473bb8 @bar=0, @baz=1> (Foo)
+#
module JSON
require 'json/version'
-
- begin
- require 'json/ext'
- rescue LoadError
- require 'json/pure'
- end
+ require 'json/ext'
end
diff --git a/ext/json/lib/json/add/bigdecimal.rb b/ext/json/lib/json/add/bigdecimal.rb
new file mode 100644
index 0000000000..dc84572f31
--- /dev/null
+++ b/ext/json/lib/json/add/bigdecimal.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+begin
+ require 'bigdecimal'
+rescue LoadError
+end
+
+class BigDecimal
+
+ # See #as_json.
+ def self.json_create(object)
+ BigDecimal._load object['b']
+ end
+
+ # Methods <tt>BigDecimal#as_json</tt> and +BigDecimal.json_create+ may be used
+ # to serialize and deserialize a \BigDecimal object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>BigDecimal#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/bigdecimal'
+ # x = BigDecimal(2).as_json # => {"json_class"=>"BigDecimal", "b"=>"27:0.2e1"}
+ # y = BigDecimal(2.0, 4).as_json # => {"json_class"=>"BigDecimal", "b"=>"36:0.2e1"}
+ # z = BigDecimal(Complex(2, 0)).as_json # => {"json_class"=>"BigDecimal", "b"=>"27:0.2e1"}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \BigDecimal object:
+ #
+ # BigDecimal.json_create(x) # => 0.2e1
+ # BigDecimal.json_create(y) # => 0.2e1
+ # BigDecimal.json_create(z) # => 0.2e1
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'b' => _dump.force_encoding(Encoding::UTF_8),
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/bigdecimal'
+ # puts BigDecimal(2).to_json
+ # puts BigDecimal(2.0, 4).to_json
+ # puts BigDecimal(Complex(2, 0)).to_json
+ #
+ # Output:
+ #
+ # {"json_class":"BigDecimal","b":"27:0.2e1"}
+ # {"json_class":"BigDecimal","b":"36:0.2e1"}
+ # {"json_class":"BigDecimal","b":"27:0.2e1"}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end if defined?(::BigDecimal)
diff --git a/ext/json/lib/json/add/complex.rb b/ext/json/lib/json/add/complex.rb
new file mode 100644
index 0000000000..9e3c6f2d0a
--- /dev/null
+++ b/ext/json/lib/json/add/complex.rb
@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Complex
+
+ # See #as_json.
+ def self.json_create(object)
+ Complex(object['r'], object['i'])
+ end
+
+ # Methods <tt>Complex#as_json</tt> and +Complex.json_create+ may be used
+ # to serialize and deserialize a \Complex object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Complex#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/complex'
+ # x = Complex(2).as_json # => {"json_class"=>"Complex", "r"=>2, "i"=>0}
+ # y = Complex(2.0, 4).as_json # => {"json_class"=>"Complex", "r"=>2.0, "i"=>4}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Complex object:
+ #
+ # Complex.json_create(x) # => (2+0i)
+ # Complex.json_create(y) # => (2.0+4i)
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'r' => real,
+ 'i' => imag,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/complex'
+ # puts Complex(2).to_json
+ # puts Complex(2.0, 4).to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Complex","r":2,"i":0}
+ # {"json_class":"Complex","r":2.0,"i":4}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/core.rb b/ext/json/lib/json/add/core.rb
index 03a00dded4..61ff454212 100644
--- a/ext/json/lib/json/add/core.rb
+++ b/ext/json/lib/json/add/core.rb
@@ -1,148 +1,13 @@
-# This file contains implementations of ruby core's custom objects for
+# frozen_string_literal: true
+# This file requires the implementations of ruby core's custom objects for
# serialisation/deserialisation.
-unless Object.const_defined?(:JSON) and ::JSON.const_defined?(:JSON_LOADED) and
- ::JSON::JSON_LOADED
- require 'json'
-end
-require 'date'
-
-class Symbol
- def to_json(*a)
- {
- JSON.create_id => self.class.name,
- 's' => to_s,
- }.to_json(*a)
- end
-
- def self.json_create(o)
- o['s'].to_sym
- end
-end
-
-class Time
- def self.json_create(object)
- if usec = object.delete('u') # used to be tv_usec -> tv_nsec
- object['n'] = usec * 1000
- end
- if respond_to?(:tv_nsec)
- at(*object.values_at('s', 'n'))
- else
- at(object['s'], object['n'] / 1000)
- end
- end
-
- def to_json(*args)
- {
- JSON.create_id => self.class.name,
- 's' => tv_sec,
- 'n' => respond_to?(:tv_nsec) ? tv_nsec : tv_usec * 1000
- }.to_json(*args)
- end
-end
-
-class Date
- def self.json_create(object)
- civil(*object.values_at('y', 'm', 'd', 'sg'))
- end
-
- alias start sg unless method_defined?(:start)
-
- def to_json(*args)
- {
- JSON.create_id => self.class.name,
- 'y' => year,
- 'm' => month,
- 'd' => day,
- 'sg' => start,
- }.to_json(*args)
- end
-end
-
-class DateTime
- def self.json_create(object)
- args = object.values_at('y', 'm', 'd', 'H', 'M', 'S')
- of_a, of_b = object['of'].split('/')
- if of_b and of_b != '0'
- args << Rational(of_a.to_i, of_b.to_i)
- else
- args << of_a
- end
- args << object['sg']
- civil(*args)
- end
-
- alias start sg unless method_defined?(:start)
-
- def to_json(*args)
- {
- JSON.create_id => self.class.name,
- 'y' => year,
- 'm' => month,
- 'd' => day,
- 'H' => hour,
- 'M' => min,
- 'S' => sec,
- 'of' => offset.to_s,
- 'sg' => start,
- }.to_json(*args)
- end
-end
-
-class Range
- def self.json_create(object)
- new(*object['a'])
- end
-
- def to_json(*args)
- {
- JSON.create_id => self.class.name,
- 'a' => [ first, last, exclude_end? ]
- }.to_json(*args)
- end
-end
-
-class Struct
- def self.json_create(object)
- new(*object['v'])
- end
-
- def to_json(*args)
- klass = self.class.name
- klass.to_s.empty? and raise JSON::JSONError, "Only named structs are supported!"
- {
- JSON.create_id => klass,
- 'v' => values,
- }.to_json(*args)
- end
-end
-
-class Exception
- def self.json_create(object)
- result = new(object['m'])
- result.set_backtrace object['b']
- result
- end
-
- def to_json(*args)
- {
- JSON.create_id => self.class.name,
- 'm' => message,
- 'b' => backtrace,
- }.to_json(*args)
- end
-end
-
-class Regexp
- def self.json_create(object)
- new(object['s'], object['o'])
- end
-
- def to_json(*)
- {
- JSON.create_id => self.class.name,
- 'o' => options,
- 's' => source,
- }.to_json
- end
-end
+require 'json/add/date'
+require 'json/add/date_time'
+require 'json/add/exception'
+require 'json/add/range'
+require 'json/add/regexp'
+require 'json/add/string'
+require 'json/add/struct'
+require 'json/add/symbol'
+require 'json/add/time'
diff --git a/ext/json/lib/json/add/date.rb b/ext/json/lib/json/add/date.rb
new file mode 100644
index 0000000000..88a098b637
--- /dev/null
+++ b/ext/json/lib/json/add/date.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+require 'date'
+
+class Date
+
+ # See #as_json.
+ def self.json_create(object)
+ civil(*object.values_at('y', 'm', 'd', 'sg'))
+ end
+
+ alias start sg unless method_defined?(:start)
+
+ # Methods <tt>Date#as_json</tt> and +Date.json_create+ may be used
+ # to serialize and deserialize a \Date object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Date#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/date'
+ # x = Date.today.as_json
+ # # => {"json_class"=>"Date", "y"=>2023, "m"=>11, "d"=>21, "sg"=>2299161.0}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Date object:
+ #
+ # Date.json_create(x)
+ # # => #<Date: 2023-11-21 ((2460270j,0s,0n),+0s,2299161j)>
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'y' => year,
+ 'm' => month,
+ 'd' => day,
+ 'sg' => start,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/date'
+ # puts Date.today.to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Date","y":2023,"m":11,"d":21,"sg":2299161.0}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/date_time.rb b/ext/json/lib/json/add/date_time.rb
new file mode 100644
index 0000000000..8b0bb5d181
--- /dev/null
+++ b/ext/json/lib/json/add/date_time.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+require 'date'
+
+class DateTime
+
+ # See #as_json.
+ def self.json_create(object)
+ args = object.values_at('y', 'm', 'd', 'H', 'M', 'S')
+ of_a, of_b = object['of'].split('/')
+ if of_b and of_b != '0'
+ args << Rational(of_a.to_i, of_b.to_i)
+ else
+ args << of_a
+ end
+ args << object['sg']
+ civil(*args)
+ end
+
+ alias start sg unless method_defined?(:start)
+
+ # Methods <tt>DateTime#as_json</tt> and +DateTime.json_create+ may be used
+ # to serialize and deserialize a \DateTime object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>DateTime#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/datetime'
+ # x = DateTime.now.as_json
+ # # => {"json_class"=>"DateTime", "y"=>2023, "m"=>11, "d"=>21, "sg"=>2299161.0}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \DateTime object:
+ #
+ # DateTime.json_create(x) # BUG? Raises Date::Error "invalid date"
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'y' => year,
+ 'm' => month,
+ 'd' => day,
+ 'H' => hour,
+ 'M' => min,
+ 'S' => sec,
+ 'of' => offset.to_s,
+ 'sg' => start,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/datetime'
+ # puts DateTime.now.to_json
+ #
+ # Output:
+ #
+ # {"json_class":"DateTime","y":2023,"m":11,"d":21,"sg":2299161.0}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
+
+
diff --git a/ext/json/lib/json/add/exception.rb b/ext/json/lib/json/add/exception.rb
new file mode 100644
index 0000000000..e85d404982
--- /dev/null
+++ b/ext/json/lib/json/add/exception.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Exception
+
+ # See #as_json.
+ def self.json_create(object)
+ result = new(object['m'])
+ result.set_backtrace object['b']
+ result
+ end
+
+ # Methods <tt>Exception#as_json</tt> and +Exception.json_create+ may be used
+ # to serialize and deserialize a \Exception object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Exception#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/exception'
+ # x = Exception.new('Foo').as_json # => {"json_class"=>"Exception", "m"=>"Foo", "b"=>nil}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Exception object:
+ #
+ # Exception.json_create(x) # => #<Exception: Foo>
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'm' => message,
+ 'b' => backtrace,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/exception'
+ # puts Exception.new('Foo').to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Exception","m":"Foo","b":null}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/ostruct.rb b/ext/json/lib/json/add/ostruct.rb
new file mode 100644
index 0000000000..7750498144
--- /dev/null
+++ b/ext/json/lib/json/add/ostruct.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+begin
+ require 'ostruct'
+rescue LoadError
+end
+
+class OpenStruct
+
+ # See #as_json.
+ def self.json_create(object)
+ new(object['t'] || object[:t])
+ end
+
+ # Methods <tt>OpenStruct#as_json</tt> and +OpenStruct.json_create+ may be used
+ # to serialize and deserialize a \OpenStruct object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>OpenStruct#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/ostruct'
+ # x = OpenStruct.new('name' => 'Rowdy', :age => nil).as_json
+ # # => {"json_class"=>"OpenStruct", "t"=>{:name=>'Rowdy', :age=>nil}}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \OpenStruct object:
+ #
+ # OpenStruct.json_create(x)
+ # # => #<OpenStruct name='Rowdy', age=nil>
+ #
+ def as_json(*)
+ klass = self.class.name
+ klass.to_s.empty? and raise JSON::JSONError, "Only named structs are supported!"
+ {
+ JSON.create_id => klass,
+ 't' => table,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/ostruct'
+ # puts OpenStruct.new('name' => 'Rowdy', :age => nil).to_json
+ #
+ # Output:
+ #
+ # {"json_class":"OpenStruct","t":{'name':'Rowdy',"age":null}}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end if defined?(::OpenStruct)
diff --git a/ext/json/lib/json/add/range.rb b/ext/json/lib/json/add/range.rb
new file mode 100644
index 0000000000..408d2c32f6
--- /dev/null
+++ b/ext/json/lib/json/add/range.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Range
+
+ # See #as_json.
+ def self.json_create(object)
+ new(*object['a'])
+ end
+
+ # Methods <tt>Range#as_json</tt> and +Range.json_create+ may be used
+ # to serialize and deserialize a \Range object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Range#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/range'
+ # x = (1..4).as_json # => {"json_class"=>"Range", "a"=>[1, 4, false]}
+ # y = (1...4).as_json # => {"json_class"=>"Range", "a"=>[1, 4, true]}
+ # z = ('a'..'d').as_json # => {"json_class"=>"Range", "a"=>["a", "d", false]}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Range object:
+ #
+ # Range.json_create(x) # => 1..4
+ # Range.json_create(y) # => 1...4
+ # Range.json_create(z) # => "a".."d"
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'a' => [ first, last, exclude_end? ]
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/range'
+ # puts (1..4).to_json
+ # puts (1...4).to_json
+ # puts ('a'..'d').to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Range","a":[1,4,false]}
+ # {"json_class":"Range","a":[1,4,true]}
+ # {"json_class":"Range","a":["a","d",false]}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/rational.rb b/ext/json/lib/json/add/rational.rb
new file mode 100644
index 0000000000..c95812ea8e
--- /dev/null
+++ b/ext/json/lib/json/add/rational.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Rational
+
+ # See #as_json.
+ def self.json_create(object)
+ Rational(object['n'], object['d'])
+ end
+
+ # Methods <tt>Rational#as_json</tt> and +Rational.json_create+ may be used
+ # to serialize and deserialize a \Rational object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Rational#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/rational'
+ # x = Rational(2, 3).as_json
+ # # => {"json_class"=>"Rational", "n"=>2, "d"=>3}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Rational object:
+ #
+ # Rational.json_create(x)
+ # # => (2/3)
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'n' => numerator,
+ 'd' => denominator,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/rational'
+ # puts Rational(2, 3).to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Rational","n":2,"d":3}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/regexp.rb b/ext/json/lib/json/add/regexp.rb
new file mode 100644
index 0000000000..aebfb2db5c
--- /dev/null
+++ b/ext/json/lib/json/add/regexp.rb
@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Regexp
+
+ # See #as_json.
+ def self.json_create(object)
+ new(object['s'], object['o'])
+ end
+
+ # Methods <tt>Regexp#as_json</tt> and +Regexp.json_create+ may be used
+ # to serialize and deserialize a \Regexp object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Regexp#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/regexp'
+ # x = /foo/.as_json
+ # # => {"json_class"=>"Regexp", "o"=>0, "s"=>"foo"}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Regexp object:
+ #
+ # Regexp.json_create(x) # => /foo/
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'o' => options,
+ 's' => source,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/regexp'
+ # puts /foo/.to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Regexp","o":0,"s":"foo"}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/set.rb b/ext/json/lib/json/add/set.rb
new file mode 100644
index 0000000000..1918353187
--- /dev/null
+++ b/ext/json/lib/json/add/set.rb
@@ -0,0 +1,48 @@
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+defined?(::Set) or require 'set'
+
+class Set
+
+ # See #as_json.
+ def self.json_create(object)
+ new object['a']
+ end
+
+ # Methods <tt>Set#as_json</tt> and +Set.json_create+ may be used
+ # to serialize and deserialize a \Set object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Set#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/set'
+ # x = Set.new(%w/foo bar baz/).as_json
+ # # => {"json_class"=>"Set", "a"=>["foo", "bar", "baz"]}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Set object:
+ #
+ # Set.json_create(x) # => #<Set: {"foo", "bar", "baz"}>
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 'a' => to_a,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/set'
+ # puts Set.new(%w/foo bar baz/).to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Set","a":["foo","bar","baz"]}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
+
diff --git a/ext/json/lib/json/add/string.rb b/ext/json/lib/json/add/string.rb
new file mode 100644
index 0000000000..9c3bde27fb
--- /dev/null
+++ b/ext/json/lib/json/add/string.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class String
+ # call-seq: json_create(o)
+ #
+ # Raw Strings are JSON Objects (the raw bytes are stored in an array for the
+ # key "raw"). The Ruby String can be created by this class method.
+ def self.json_create(object)
+ object["raw"].pack("C*")
+ end
+
+ # call-seq: to_json_raw_object()
+ #
+ # This method creates a raw object hash, that can be nested into
+ # other data structures and will be generated as a raw string. This
+ # method should be used, if you want to convert raw strings to JSON
+ # instead of UTF-8 strings, e. g. binary data.
+ def to_json_raw_object
+ {
+ JSON.create_id => self.class.name,
+ "raw" => unpack("C*"),
+ }
+ end
+
+ # call-seq: to_json_raw(*args)
+ #
+ # This method creates a JSON text from the result of a call to
+ # to_json_raw_object of this String.
+ def to_json_raw(...)
+ to_json_raw_object.to_json(...)
+ end
+end
diff --git a/ext/json/lib/json/add/struct.rb b/ext/json/lib/json/add/struct.rb
new file mode 100644
index 0000000000..6760c3d86c
--- /dev/null
+++ b/ext/json/lib/json/add/struct.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Struct
+
+ # See #as_json.
+ def self.json_create(object)
+ new(*object['v'])
+ end
+
+ # Methods <tt>Struct#as_json</tt> and +Struct.json_create+ may be used
+ # to serialize and deserialize a \Struct object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Struct#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/struct'
+ # Customer = Struct.new('Customer', :name, :address, :zip)
+ # x = Struct::Customer.new.as_json
+ # # => {"json_class"=>"Struct::Customer", "v"=>[nil, nil, nil]}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Struct object:
+ #
+ # Struct::Customer.json_create(x)
+ # # => #<struct Struct::Customer name=nil, address=nil, zip=nil>
+ #
+ def as_json(*)
+ klass = self.class.name
+ klass.to_s.empty? and raise JSON::JSONError, "Only named structs are supported!"
+ {
+ JSON.create_id => klass,
+ 'v' => values,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/struct'
+ # Customer = Struct.new('Customer', :name, :address, :zip)
+ # puts Struct::Customer.new.to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Struct","t":{'name':'Rowdy',"age":null}}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/add/symbol.rb b/ext/json/lib/json/add/symbol.rb
new file mode 100644
index 0000000000..806be4f025
--- /dev/null
+++ b/ext/json/lib/json/add/symbol.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Symbol
+
+ # Methods <tt>Symbol#as_json</tt> and +Symbol.json_create+ may be used
+ # to serialize and deserialize a \Symbol object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Symbol#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/symbol'
+ # x = :foo.as_json
+ # # => {"json_class"=>"Symbol", "s"=>"foo"}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Symbol object:
+ #
+ # Symbol.json_create(x) # => :foo
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 's' => to_s,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/symbol'
+ # puts :foo.to_json
+ #
+ # Output:
+ #
+ # # {"json_class":"Symbol","s":"foo"}
+ #
+ def to_json(state = nil, *a)
+ state = ::JSON::State.from_state(state)
+ if state.strict?
+ super
+ else
+ as_json.to_json(state, *a)
+ end
+ end
+
+ # See #as_json.
+ def self.json_create(o)
+ o['s'].to_sym
+ end
+end
diff --git a/ext/json/lib/json/add/time.rb b/ext/json/lib/json/add/time.rb
new file mode 100644
index 0000000000..b03d4ff251
--- /dev/null
+++ b/ext/json/lib/json/add/time.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED
+ require 'json'
+end
+
+class Time
+
+ # See #as_json.
+ def self.json_create(object)
+ if usec = object.delete('u') # used to be tv_usec -> tv_nsec
+ object['n'] = usec * 1000
+ end
+ at(object['s'], Rational(object['n'], 1000))
+ end
+
+ # Methods <tt>Time#as_json</tt> and +Time.json_create+ may be used
+ # to serialize and deserialize a \Time object;
+ # see Marshal[rdoc-ref:Marshal].
+ #
+ # \Method <tt>Time#as_json</tt> serializes +self+,
+ # returning a 2-element hash representing +self+:
+ #
+ # require 'json/add/time'
+ # x = Time.now.as_json
+ # # => {"json_class"=>"Time", "s"=>1700931656, "n"=>472846644}
+ #
+ # \Method +JSON.create+ deserializes such a hash, returning a \Time object:
+ #
+ # Time.json_create(x)
+ # # => 2023-11-25 11:00:56.472846644 -0600
+ #
+ def as_json(*)
+ {
+ JSON.create_id => self.class.name,
+ 's' => tv_sec,
+ 'n' => tv_nsec,
+ }
+ end
+
+ # Returns a JSON string representing +self+:
+ #
+ # require 'json/add/time'
+ # puts Time.now.to_json
+ #
+ # Output:
+ #
+ # {"json_class":"Time","s":1700931678,"n":980650786}
+ #
+ def to_json(*args)
+ as_json.to_json(*args)
+ end
+end
diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb
index 91cb3c2190..230bf08012 100644
--- a/ext/json/lib/json/common.rb
+++ b/ext/json/lib/json/common.rb
@@ -1,354 +1,1125 @@
+# frozen_string_literal: true
+
require 'json/version'
module JSON
+ autoload :GenericObject, 'json/generic_object'
+
+ module ParserOptions # :nodoc:
+ class << self
+ def prepare(opts)
+ if opts[:object_class] || opts[:array_class]
+ opts = opts.dup
+ on_load = opts[:on_load]
+
+ on_load = object_class_proc(opts[:object_class], on_load) if opts[:object_class]
+ on_load = array_class_proc(opts[:array_class], on_load) if opts[:array_class]
+ opts[:on_load] = on_load
+ end
+
+ if opts.fetch(:create_additions, false) != false
+ opts = create_additions_proc(opts)
+ end
+
+ opts
+ end
+
+ private
+
+ def object_class_proc(object_class, on_load)
+ ->(obj) do
+ if Hash === obj
+ object = object_class.new
+ obj.each { |k, v| object[k] = v }
+ obj = object
+ end
+ on_load.nil? ? obj : on_load.call(obj)
+ end
+ end
+
+ def array_class_proc(array_class, on_load)
+ ->(obj) do
+ if Array === obj
+ array = array_class.new
+ obj.each { |v| array << v }
+ obj = array
+ end
+ on_load.nil? ? obj : on_load.call(obj)
+ end
+ end
+
+ # TODO: extract :create_additions support to another gem for version 3.0
+ def create_additions_proc(opts)
+ if opts[:symbolize_names]
+ raise ArgumentError, "options :symbolize_names and :create_additions cannot be used in conjunction"
+ end
+
+ opts = opts.dup
+ create_additions = opts.fetch(:create_additions, false)
+ on_load = opts[:on_load]
+ object_class = opts[:object_class] || Hash
+
+ opts[:on_load] = ->(object) do
+ case object
+ when String
+ opts[:match_string]&.each do |pattern, klass|
+ if match = pattern.match(object)
+ create_additions_warning if create_additions.nil?
+ object = klass.json_create(object)
+ break
+ end
+ end
+ when object_class
+ if opts[:create_additions] != false
+ if class_path = object[JSON.create_id]
+ klass = begin
+ Object.const_get(class_path)
+ rescue NameError => e
+ raise ArgumentError, "can't get const #{class_path}: #{e}"
+ end
+
+ if klass.respond_to?(:json_creatable?) ? klass.json_creatable? : klass.respond_to?(:json_create)
+ create_additions_warning if create_additions.nil?
+ object = klass.json_create(object)
+ end
+ end
+ end
+ end
+
+ on_load.nil? ? object : on_load.call(object)
+ end
+
+ opts
+ end
+
+ def create_additions_warning
+ JSON.deprecation_warning "JSON.load implicit support for `create_additions: true` is deprecated " \
+ "and will be removed in 3.0, use JSON.unsafe_load or explicitly " \
+ "pass `create_additions: true`"
+ end
+ end
+ end
+
class << self
- # If _object_ is string-like parse the string and return the parsed result
- # as a Ruby data structure. Otherwise generate a JSON text from the Ruby
- # data structure object and return it.
- #
- # The _opts_ argument is passed through to generate/parse respectively, see
- # generate and parse for their documentation.
- def [](object, opts = {})
- if object.respond_to? :to_str
- JSON.parse(object.to_str, opts)
+ def deprecation_warning(message, uplevel = 3) # :nodoc:
+ gem_root = File.expand_path("..", __dir__) + "/"
+ caller_locations(uplevel, 10).each do |frame|
+ if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c")
+ uplevel += 1
+ else
+ break
+ end
+ end
+
+ if RUBY_VERSION >= "3.0"
+ warn(message, uplevel: uplevel, category: :deprecated)
else
- JSON.generate(object, opts)
+ warn(message, uplevel: uplevel)
end
end
- # Returns the JSON parser class, that is used by JSON. This might be either
- # JSON::Ext::Parser or JSON::Pure::Parser.
+ # :call-seq:
+ # JSON[object] -> new_array or new_string
+ #
+ # If +object+ is a \String,
+ # calls JSON.parse with +object+ and +opts+ (see method #parse):
+ # json = '[0, 1, null]'
+ # JSON[json]# => [0, 1, nil]
+ #
+ # Otherwise, calls JSON.generate with +object+ and +opts+ (see method #generate):
+ # ruby = [0, 1, nil]
+ # JSON[ruby] # => '[0,1,null]'
+ def [](object, opts = nil)
+ if object.is_a?(String)
+ return JSON.parse(object, opts)
+ elsif object.respond_to?(:to_str)
+ str = object.to_str
+ if str.is_a?(String)
+ return JSON.parse(str, opts)
+ end
+ end
+
+ JSON.generate(object, opts)
+ end
+
+ # Returns the JSON parser class that is used by JSON.
attr_reader :parser
# Set the JSON parser class _parser_ to be used by JSON.
def parser=(parser) # :nodoc:
@parser = parser
- remove_const :Parser if const_defined? :Parser
+ remove_const :Parser if const_defined?(:Parser, false)
const_set :Parser, parser
end
- # Return the constant located at _path_. The format of _path_ has to be
- # either ::A::B::C or A::B::C. In any case A has to be located at the top
- # level (absolute namespace path?). If there doesn't exist a constant at
- # the given path, an ArgumentError is raised.
- def deep_const_get(path) # :nodoc:
- path.to_s.split(/::/).inject(Object) do |p, c|
- case
- when c.empty? then p
- when p.const_defined?(c) then p.const_get(c)
- else
- begin
- p.const_missing(c)
- rescue NameError => e
- raise ArgumentError, "can't get const #{path}: #{e}"
- end
- end
- end
- end
-
# Set the module _generator_ to be used by JSON.
def generator=(generator) # :nodoc:
old, $VERBOSE = $VERBOSE, nil
@generator = generator
- generator_methods = generator::GeneratorMethods
- for const in generator_methods.constants
- klass = deep_const_get(const)
- modul = generator_methods.const_get(const)
- klass.class_eval do
- instance_methods(false).each do |m|
- m.to_s == 'to_json' and remove_method m
+ if generator.const_defined?(:GeneratorMethods)
+ generator_methods = generator::GeneratorMethods
+ for const in generator_methods.constants
+ klass = const_get(const)
+ modul = generator_methods.const_get(const)
+ klass.class_eval do
+ instance_methods(false).each do |m|
+ m.to_s == 'to_json' and remove_method m
+ end
+ include modul
end
- include modul
end
end
self.state = generator::State
- const_set :State, self.state
- const_set :SAFE_STATE_PROTOTYPE, State.new
- const_set :FAST_STATE_PROTOTYPE, State.new(
- :indent => '',
- :space => '',
- :object_nl => "",
- :array_nl => "",
- :max_nesting => false
- )
- const_set :PRETTY_STATE_PROTOTYPE, State.new(
- :indent => ' ',
- :space => ' ',
- :object_nl => "\n",
- :array_nl => "\n"
- )
+ const_set :State, state
ensure
$VERBOSE = old
end
- # Returns the JSON generator modul, that is used by JSON. This might be
- # either JSON::Ext::Generator or JSON::Pure::Generator.
+ # Returns the JSON generator module that is used by JSON.
attr_reader :generator
- # Returns the JSON generator state class, that is used by JSON. This might
- # be either JSON::Ext::Generator::State or JSON::Pure::Generator::State.
+ # Sets or Returns the JSON generator state class that is used by JSON.
attr_accessor :state
- # This is create identifier, that is used to decide, if the _json_create_
- # hook of a class should be called. It defaults to 'json_class'.
- attr_accessor :create_id
+ private
+
+ # Called from the extension when a hash has both string and symbol keys
+ def on_mixed_keys_hash(hash, do_raise)
+ set = {}
+ hash.each_key do |key|
+ key_str = key.to_s
+
+ if set[key_str]
+ message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}"
+ if do_raise
+ raise GeneratorError, message
+ else
+ deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`")
+ end
+ else
+ set[key_str] = true
+ end
+ end
+ end
+
+ def deprecated_singleton_attr_accessor(*attrs)
+ args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : ""
+ attrs.each do |attr|
+ singleton_class.class_eval <<~RUBY
+ def #{attr}
+ warn "JSON.#{attr} is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args}
+ @#{attr}
+ end
+
+ def #{attr}=(val)
+ warn "JSON.#{attr}= is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args}
+ @#{attr} = val
+ end
+
+ def _#{attr}
+ @#{attr}
+ end
+ RUBY
+ end
+ end
+ end
+
+ # Sets create identifier, which is used to decide if the _json_create_
+ # hook of a class should be called; initial value is +json_class+:
+ # JSON.create_id # => 'json_class'
+ def self.create_id=(new_value)
+ Thread.current[:"JSON.create_id"] = new_value.dup.freeze
+ end
+
+ # Returns the current create identifier.
+ # See also JSON.create_id=.
+ def self.create_id
+ Thread.current[:"JSON.create_id"] || 'json_class'
end
- self.create_id = 'json_class'
- NaN = 0.0/0
+ NaN = Float::NAN
- Infinity = 1.0/0
+ Infinity = Float::INFINITY
MinusInfinity = -Infinity
# The base exception for JSON errors.
class JSONError < StandardError; end
- # This exception is raised, if a parser error occurs.
- class ParserError < JSONError; end
+ # This exception is raised if a parser error occurs.
+ class ParserError < JSONError
+ attr_reader :line, :column
+ end
- # This exception is raised, if the nesting of parsed datastructures is too
+ # This exception is raised if the nesting of parsed data structures is too
# deep.
class NestingError < ParserError; end
- # :stopdoc:
- class CircularDatastructure < NestingError; end
- # :startdoc:
+ # This exception is raised if a generator or unparser error occurs.
+ class GeneratorError < JSONError
+ attr_reader :invalid_object
+
+ def initialize(message, invalid_object = nil)
+ super(message)
+ @invalid_object = invalid_object
+ end
+
+ def detailed_message(...)
+ # Exception#detailed_message doesn't exist until Ruby 3.2
+ super_message = defined?(super) ? super : message
+
+ if @invalid_object.nil?
+ super_message
+ else
+ "#{super_message}\nInvalid object: #{@invalid_object.inspect}"
+ end
+ end
+ end
- # This exception is raised, if a generator or unparser error occurs.
- class GeneratorError < JSONError; end
- # For backwards compatibility
- UnparserError = GeneratorError
+ # Fragment of JSON document that is to be included as is:
+ # fragment = JSON::Fragment.new("[1, 2, 3]")
+ # JSON.generate({ count: 3, items: fragments })
+ #
+ # This allows to easily assemble multiple JSON fragments that have
+ # been persisted somewhere without having to parse them nor resorting
+ # to string interpolation.
+ #
+ # Note: no validation is performed on the provided string. It is the
+ # responsibility of the caller to ensure the string contains valid JSON.
+ Fragment = Struct.new(:json) do
+ def initialize(json)
+ unless string = String.try_convert(json)
+ raise TypeError, " no implicit conversion of #{json.class} into String"
+ end
- # This exception is raised, if the required unicode support is missing on the
- # system. Usually this means, that the iconv library is not installed.
- class MissingUnicodeSupport < JSONError; end
+ super(string)
+ end
+
+ def to_json(state = nil, *)
+ json
+ end
+ end
module_function
- # Parse the JSON document _source_ into a Ruby data structure and return it.
- #
- # _opts_ can have the following
- # keys:
- # * *max_nesting*: The maximum depth of nesting allowed in the parsed data
- # structures. Disable depth checking with :max_nesting => false, it defaults
- # to 19.
- # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
- # defiance of RFC 4627 to be parsed by the Parser. This option defaults
- # to false.
- # * *symbolize_names*: If set to true, returns symbols for the names
- # (keys) in a JSON object. Otherwise strings are returned, which is also
- # the default.
- # * *create_additions*: If set to false, the Parser doesn't create
- # additions even if a matchin class and create_id was found. This option
- # defaults to true.
- # * *object_class*: Defaults to Hash
- # * *array_class*: Defaults to Array
- def parse(source, opts = {})
- Parser.new(source, opts).parse
+ # :call-seq:
+ # JSON.parse(source, opts) -> object
+ #
+ # Returns the Ruby objects created by parsing the given +source+.
+ #
+ # Argument +source+ contains the \String to be parsed.
+ #
+ # Argument +opts+, if given, contains a \Hash of options for the parsing.
+ # See {Parsing Options}[#module-JSON-label-Parsing+Options].
+ #
+ # ---
+ #
+ # When +source+ is a \JSON array, returns a Ruby \Array:
+ # source = '["foo", 1.0, true, false, null]'
+ # ruby = JSON.parse(source)
+ # ruby # => ["foo", 1.0, true, false, nil]
+ # ruby.class # => Array
+ #
+ # When +source+ is a \JSON object, returns a Ruby \Hash:
+ # source = '{"a": "foo", "b": 1.0, "c": true, "d": false, "e": null}'
+ # ruby = JSON.parse(source)
+ # ruby # => {"a"=>"foo", "b"=>1.0, "c"=>true, "d"=>false, "e"=>nil}
+ # ruby.class # => Hash
+ #
+ # For examples of parsing for all \JSON data types, see
+ # {Parsing \JSON}[#module-JSON-label-Parsing+JSON].
+ #
+ # Parses nested JSON objects:
+ # source = <<~JSON
+ # {
+ # "name": "Dave",
+ # "age" :40,
+ # "hats": [
+ # "Cattleman's",
+ # "Panama",
+ # "Tophat"
+ # ]
+ # }
+ # JSON
+ # ruby = JSON.parse(source)
+ # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # ---
+ #
+ # Raises an exception if +source+ is not valid JSON:
+ # # Raises JSON::ParserError (783: unexpected token at ''):
+ # JSON.parse('')
+ #
+ def parse(source, opts = nil)
+ opts = ParserOptions.prepare(opts) unless opts.nil?
+ Parser.parse(source, opts)
end
- # Parse the JSON document _source_ into a Ruby data structure and return it.
- # The bang version of the parse method, defaults to the more dangerous values
- # for the _opts_ hash, so be sure only to parse trusted _source_ documents.
- #
- # _opts_ can have the following keys:
- # * *max_nesting*: The maximum depth of nesting allowed in the parsed data
- # structures. Enable depth checking with :max_nesting => anInteger. The parse!
- # methods defaults to not doing max depth checking: This can be dangerous,
- # if someone wants to fill up your stack.
- # * *allow_nan*: If set to true, allow NaN, Infinity, and -Infinity in
- # defiance of RFC 4627 to be parsed by the Parser. This option defaults
- # to true.
- # * *create_additions*: If set to false, the Parser doesn't create
- # additions even if a matchin class and create_id was found. This option
- # defaults to true.
- def parse!(source, opts = {})
- opts = {
- :max_nesting => false,
- :allow_nan => true
- }.update(opts)
- Parser.new(source, opts).parse
+ PARSE_L_OPTIONS = {
+ max_nesting: false,
+ allow_nan: true,
+ }.freeze
+ private_constant :PARSE_L_OPTIONS
+
+ # :call-seq:
+ # JSON.parse!(source, opts) -> object
+ #
+ # Calls
+ # parse(source, opts)
+ # with +source+ and possibly modified +opts+.
+ #
+ # Differences from JSON.parse:
+ # - Option +max_nesting+, if not provided, defaults to +false+,
+ # which disables checking for nesting depth.
+ # - Option +allow_nan+, if not provided, defaults to +true+.
+ def parse!(source, opts = nil)
+ if opts.nil?
+ parse(source, PARSE_L_OPTIONS)
+ else
+ parse(source, PARSE_L_OPTIONS.merge(opts))
+ end
end
- # Generate a JSON document from the Ruby data structure _obj_ and return
- # it. _state_ is * a JSON::State object,
- # * or a Hash like object (responding to to_hash),
- # * an object convertible into a hash by a to_h method,
- # that is used as or to configure a State object.
- #
- # It defaults to a state object, that creates the shortest possible JSON text
- # in one line, checks for circular data structures and doesn't allow NaN,
- # Infinity, and -Infinity.
- #
- # A _state_ hash can have the following keys:
- # * *indent*: a string used to indent levels (default: ''),
- # * *space*: a string that is put after, a : or , delimiter (default: ''),
- # * *space_before*: a string that is put before a : pair delimiter (default: ''),
- # * *object_nl*: a string that is put at the end of a JSON object (default: ''),
- # * *array_nl*: a string that is put at the end of a JSON array (default: ''),
- # * *allow_nan*: true if NaN, Infinity, and -Infinity should be
- # generated, otherwise an exception is thrown, if these values are
- # encountered. This options defaults to false.
- # * *max_nesting*: The maximum depth of nesting allowed in the data
- # structures from which JSON is to be generated. Disable depth checking
- # with :max_nesting => false, it defaults to 19.
- #
- # See also the fast_generate for the fastest creation method with the least
- # amount of sanity checks, and the pretty_generate method for some
- # defaults for a pretty output.
+ # :call-seq:
+ # JSON.load_file(path, opts={}) -> object
+ #
+ # Calls:
+ # parse(File.read(path), opts)
+ #
+ # See method #parse.
+ def load_file(filespec, opts = nil)
+ parse(File.read(filespec, encoding: Encoding::UTF_8), opts)
+ end
+
+ # :call-seq:
+ # JSON.load_file!(path, opts = {})
+ #
+ # Calls:
+ # JSON.parse!(File.read(path, opts))
+ #
+ # See method #parse!
+ def load_file!(filespec, opts = nil)
+ parse!(File.read(filespec, encoding: Encoding::UTF_8), opts)
+ end
+
+ # :call-seq:
+ # JSON.generate(obj, opts = nil) -> new_string
+ #
+ # Returns a \String containing the generated \JSON data.
+ #
+ # See also JSON.pretty_generate.
+ #
+ # Argument +obj+ is the Ruby object to be converted to \JSON.
+ #
+ # Argument +opts+, if given, contains a \Hash of options for the generation.
+ # See {Generating Options}[#module-JSON-label-Generating+Options].
+ #
+ # ---
+ #
+ # When +obj+ is an \Array, returns a \String containing a \JSON array:
+ # obj = ["foo", 1.0, true, false, nil]
+ # json = JSON.generate(obj)
+ # json # => '["foo",1.0,true,false,null]'
+ #
+ # When +obj+ is a \Hash, returns a \String containing a \JSON object:
+ # obj = {foo: 0, bar: 's', baz: :bat}
+ # json = JSON.generate(obj)
+ # json # => '{"foo":0,"bar":"s","baz":"bat"}'
+ #
+ # For examples of generating from other Ruby objects, see
+ # {Generating \JSON from Other Objects}[#module-JSON-label-Generating+JSON+from+Other+Objects].
+ #
+ # ---
+ #
+ # Raises an exception if any formatting option is not a \String.
+ #
+ # Raises an exception if +obj+ contains circular references:
+ # a = []; b = []; a.push(b); b.push(a)
+ # # Raises JSON::NestingError (nesting of 100 is too deep):
+ # JSON.generate(a)
+ #
def generate(obj, opts = nil)
- state = SAFE_STATE_PROTOTYPE.dup
+ if State === opts
+ opts.generate(obj)
+ else
+ State.generate(obj, opts, nil)
+ end
+ end
+
+ # :call-seq:
+ # JSON.fast_generate(obj, opts) -> new_string
+ #
+ # Arguments +obj+ and +opts+ here are the same as
+ # arguments +obj+ and +opts+ in JSON.generate.
+ #
+ # By default, generates \JSON data without checking
+ # for circular references in +obj+ (option +max_nesting+ set to +false+, disabled).
+ #
+ # Raises an exception if +obj+ contains circular references:
+ # a = []; b = []; a.push(b); b.push(a)
+ # # Raises SystemStackError (stack level too deep):
+ # JSON.fast_generate(a)
+ def fast_generate(obj, opts = nil)
+ if RUBY_VERSION >= "3.0"
+ warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated
+ else
+ warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1
+ end
+ generate(obj, opts)
+ end
+
+ PRETTY_GENERATE_OPTIONS = {
+ indent: ' ',
+ space: ' ',
+ object_nl: "\n",
+ array_nl: "\n",
+ }.freeze
+ private_constant :PRETTY_GENERATE_OPTIONS
+
+ # :call-seq:
+ # JSON.pretty_generate(obj, opts = nil) -> new_string
+ #
+ # Arguments +obj+ and +opts+ here are the same as
+ # arguments +obj+ and +opts+ in JSON.generate.
+ #
+ # Default options are:
+ # {
+ # indent: ' ', # Two spaces
+ # space: ' ', # One space
+ # array_nl: "\n", # Newline
+ # object_nl: "\n" # Newline
+ # }
+ #
+ # Example:
+ # obj = {foo: [:bar, :baz], bat: {bam: 0, bad: 1}}
+ # json = JSON.pretty_generate(obj)
+ # puts json
+ # Output:
+ # {
+ # "foo": [
+ # "bar",
+ # "baz"
+ # ],
+ # "bat": {
+ # "bam": 0,
+ # "bad": 1
+ # }
+ # }
+ #
+ def pretty_generate(obj, opts = nil)
+ return opts.generate(obj) if State === opts
+
+ options = PRETTY_GENERATE_OPTIONS
+
if opts
- if opts.respond_to? :to_hash
- opts = opts.to_hash
- elsif opts.respond_to? :to_h
- opts = opts.to_h
- else
- raise TypeError, "can't convert #{opts.class} into Hash"
+ unless opts.is_a?(Hash)
+ if opts.respond_to? :to_hash
+ opts = opts.to_hash
+ elsif opts.respond_to? :to_h
+ opts = opts.to_h
+ else
+ raise TypeError, "can't convert #{opts.class} into Hash"
+ end
end
- state = state.configure(opts)
+ options = options.merge(opts)
end
- state.generate(obj)
+
+ State.generate(obj, options, nil)
end
- # :stopdoc:
- # I want to deprecate these later, so I'll first be silent about them, and
- # later delete them.
- alias unparse generate
- module_function :unparse
- # :startdoc:
+ # Sets or returns default options for the JSON.unsafe_load method.
+ # Initially:
+ # opts = JSON.load_default_options
+ # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true}
+ deprecated_singleton_attr_accessor :unsafe_load_default_options
+
+ @unsafe_load_default_options = {
+ :max_nesting => false,
+ :allow_nan => true,
+ :allow_blank => true,
+ :create_additions => true,
+ }
- # Generate a JSON document from the Ruby data structure _obj_ and return it.
- # This method disables the checks for circles in Ruby objects.
+ # Sets or returns default options for the JSON.load method.
+ # Initially:
+ # opts = JSON.load_default_options
+ # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true}
+ deprecated_singleton_attr_accessor :load_default_options
+
+ @load_default_options = {
+ :allow_nan => true,
+ :allow_blank => true,
+ :create_additions => nil,
+ }
+ # :call-seq:
+ # JSON.unsafe_load(source, options = {}) -> object
+ # JSON.unsafe_load(source, proc = nil, options = {}) -> object
#
- # *WARNING*: Be careful not to pass any Ruby data structures with circles as
- # _obj_ argument, because this will cause JSON to go into an infinite loop.
- def fast_generate(obj, opts = nil)
- state = FAST_STATE_PROTOTYPE.dup
- if opts
- if opts.respond_to? :to_hash
- opts = opts.to_hash
- elsif opts.respond_to? :to_h
- opts = opts.to_h
+ # Returns the Ruby objects created by parsing the given +source+.
+ #
+ # BEWARE: This method is meant to serialise data from trusted user input,
+ # like from your own database server or clients under your control, it could
+ # be dangerous to allow untrusted users to pass JSON sources into it.
+ #
+ # - Argument +source+ must be, or be convertible to, a \String:
+ # - If +source+ responds to instance method +to_str+,
+ # <tt>source.to_str</tt> becomes the source.
+ # - If +source+ responds to instance method +to_io+,
+ # <tt>source.to_io.read</tt> becomes the source.
+ # - If +source+ responds to instance method +read+,
+ # <tt>source.read</tt> becomes the source.
+ # - If both of the following are true, source becomes the \String <tt>'null'</tt>:
+ # - Option +allow_blank+ specifies a truthy value.
+ # - The source, as defined above, is +nil+ or the empty \String <tt>''</tt>.
+ # - Otherwise, +source+ remains the source.
+ # - Argument +proc+, if given, must be a \Proc that accepts one argument.
+ # It will be called recursively with each result (depth-first order).
+ # See details below.
+ # - Argument +opts+, if given, contains a \Hash of options for the parsing.
+ # See {Parsing Options}[#module-JSON-label-Parsing+Options].
+ # The default options can be changed via method JSON.unsafe_load_default_options=.
+ #
+ # ---
+ #
+ # When no +proc+ is given, modifies +source+ as above and returns the result of
+ # <tt>parse(source, opts)</tt>; see #parse.
+ #
+ # Source for following examples:
+ # source = <<~JSON
+ # {
+ # "name": "Dave",
+ # "age" :40,
+ # "hats": [
+ # "Cattleman's",
+ # "Panama",
+ # "Tophat"
+ # ]
+ # }
+ # JSON
+ #
+ # Load a \String:
+ # ruby = JSON.unsafe_load(source)
+ # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # Load an \IO object:
+ # require 'stringio'
+ # object = JSON.unsafe_load(StringIO.new(source))
+ # object # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # Load a \File object:
+ # path = 't.json'
+ # File.write(path, source)
+ # File.open(path) do |file|
+ # JSON.unsafe_load(file)
+ # end # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # ---
+ #
+ # When +proc+ is given:
+ # - Modifies +source+ as above.
+ # - Gets the +result+ from calling <tt>parse(source, opts)</tt>.
+ # - Recursively calls <tt>proc(result)</tt>.
+ # - Returns the final result.
+ #
+ # Example:
+ # require 'json'
+ #
+ # # Some classes for the example.
+ # class Base
+ # def initialize(attributes)
+ # @attributes = attributes
+ # end
+ # end
+ # class User < Base; end
+ # class Account < Base; end
+ # class Admin < Base; end
+ # # The JSON source.
+ # json = <<-EOF
+ # {
+ # "users": [
+ # {"type": "User", "username": "jane", "email": "jane@example.com"},
+ # {"type": "User", "username": "john", "email": "john@example.com"}
+ # ],
+ # "accounts": [
+ # {"account": {"type": "Account", "paid": true, "account_id": "1234"}},
+ # {"account": {"type": "Account", "paid": false, "account_id": "1235"}}
+ # ],
+ # "admins": {"type": "Admin", "password": "0wn3d"}
+ # }
+ # EOF
+ # # Deserializer method.
+ # def deserialize_obj(obj, safe_types = %w(User Account Admin))
+ # type = obj.is_a?(Hash) && obj["type"]
+ # safe_types.include?(type) ? Object.const_get(type).new(obj) : obj
+ # end
+ # # Call to JSON.unsafe_load
+ # ruby = JSON.unsafe_load(json, proc {|obj|
+ # case obj
+ # when Hash
+ # obj.each {|k, v| obj[k] = deserialize_obj v }
+ # when Array
+ # obj.map! {|v| deserialize_obj v }
+ # end
+ # obj
+ # })
+ # pp ruby
+ # Output:
+ # {"users"=>
+ # [#<User:0x00000000064c4c98
+ # @attributes=
+ # {"type"=>"User", "username"=>"jane", "email"=>"jane@example.com"}>,
+ # #<User:0x00000000064c4bd0
+ # @attributes=
+ # {"type"=>"User", "username"=>"john", "email"=>"john@example.com"}>],
+ # "accounts"=>
+ # [{"account"=>
+ # #<Account:0x00000000064c4928
+ # @attributes={"type"=>"Account", "paid"=>true, "account_id"=>"1234"}>},
+ # {"account"=>
+ # #<Account:0x00000000064c4680
+ # @attributes={"type"=>"Account", "paid"=>false, "account_id"=>"1235"}>}],
+ # "admins"=>
+ # #<Admin:0x00000000064c41f8
+ # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>}
+ #
+ def unsafe_load(source, proc = nil, options = nil)
+ opts = if options.nil?
+ if proc && proc.is_a?(Hash)
+ options, proc = proc, nil
+ options
else
- raise TypeError, "can't convert #{opts.class} into Hash"
+ _unsafe_load_default_options
end
- state.configure(opts)
+ else
+ _unsafe_load_default_options.merge(options)
end
- state.generate(obj)
- end
- # :stopdoc:
- # I want to deprecate these later, so I'll first be silent about them, and later delete them.
- alias fast_unparse fast_generate
- module_function :fast_unparse
- # :startdoc:
+ unless source.is_a?(String)
+ if source.respond_to? :to_str
+ source = source.to_str
+ elsif source.respond_to? :to_io
+ source = source.to_io.read
+ elsif source.respond_to?(:read)
+ source = source.read
+ end
+ end
- # Generate a JSON document from the Ruby data structure _obj_ and return it.
- # The returned document is a prettier form of the document returned by
- # #unparse.
+ if opts[:allow_blank] && (source.nil? || source.empty?)
+ source = 'null'
+ end
+
+ if proc
+ opts = opts.dup
+ opts[:on_load] = proc.to_proc
+ end
+
+ parse(source, opts)
+ end
+
+ # :call-seq:
+ # JSON.load(source, options = {}) -> object
+ # JSON.load(source, proc = nil, options = {}) -> object
#
- # The _opts_ argument can be used to configure the generator, see the
- # generate method for a more detailed explanation.
- def pretty_generate(obj, opts = nil)
- state = PRETTY_STATE_PROTOTYPE.dup
- if opts
- if opts.respond_to? :to_hash
- opts = opts.to_hash
- elsif opts.respond_to? :to_h
- opts = opts.to_h
+ # Returns the Ruby objects created by parsing the given +source+.
+ #
+ # BEWARE: This method is meant to serialise data from trusted user input,
+ # like from your own database server or clients under your control, it could
+ # be dangerous to allow untrusted users to pass JSON sources into it.
+ # If you must use it, use JSON.unsafe_load instead to make it clear.
+ #
+ # Since JSON version 2.8.0, `load` emits a deprecation warning when a
+ # non native type is deserialized, without `create_additions` being explicitly
+ # enabled, and in JSON version 3.0, `load` will have `create_additions` disabled
+ # by default.
+ #
+ # - Argument +source+ must be, or be convertible to, a \String:
+ # - If +source+ responds to instance method +to_str+,
+ # <tt>source.to_str</tt> becomes the source.
+ # - If +source+ responds to instance method +to_io+,
+ # <tt>source.to_io.read</tt> becomes the source.
+ # - If +source+ responds to instance method +read+,
+ # <tt>source.read</tt> becomes the source.
+ # - If both of the following are true, source becomes the \String <tt>'null'</tt>:
+ # - Option +allow_blank+ specifies a truthy value.
+ # - The source, as defined above, is +nil+ or the empty \String <tt>''</tt>.
+ # - Otherwise, +source+ remains the source.
+ # - Argument +proc+, if given, must be a \Proc that accepts one argument.
+ # It will be called recursively with each result (depth-first order).
+ # See details below.
+ # - Argument +opts+, if given, contains a \Hash of options for the parsing.
+ # See {Parsing Options}[#module-JSON-label-Parsing+Options].
+ # The default options can be changed via method JSON.load_default_options=.
+ #
+ # ---
+ #
+ # When no +proc+ is given, modifies +source+ as above and returns the result of
+ # <tt>parse(source, opts)</tt>; see #parse.
+ #
+ # Source for following examples:
+ # source = <<~JSON
+ # {
+ # "name": "Dave",
+ # "age" :40,
+ # "hats": [
+ # "Cattleman's",
+ # "Panama",
+ # "Tophat"
+ # ]
+ # }
+ # JSON
+ #
+ # Load a \String:
+ # ruby = JSON.load(source)
+ # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # Load an \IO object:
+ # require 'stringio'
+ # object = JSON.load(StringIO.new(source))
+ # object # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # Load a \File object:
+ # path = 't.json'
+ # File.write(path, source)
+ # File.open(path) do |file|
+ # JSON.load(file)
+ # end # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]}
+ #
+ # ---
+ #
+ # When +proc+ is given:
+ # - Modifies +source+ as above.
+ # - Gets the +result+ from calling <tt>parse(source, opts)</tt>.
+ # - Recursively calls <tt>proc(result)</tt>.
+ # - Returns the final result.
+ #
+ # Example:
+ # require 'json'
+ #
+ # # Some classes for the example.
+ # class Base
+ # def initialize(attributes)
+ # @attributes = attributes
+ # end
+ # end
+ # class User < Base; end
+ # class Account < Base; end
+ # class Admin < Base; end
+ # # The JSON source.
+ # json = <<-EOF
+ # {
+ # "users": [
+ # {"type": "User", "username": "jane", "email": "jane@example.com"},
+ # {"type": "User", "username": "john", "email": "john@example.com"}
+ # ],
+ # "accounts": [
+ # {"account": {"type": "Account", "paid": true, "account_id": "1234"}},
+ # {"account": {"type": "Account", "paid": false, "account_id": "1235"}}
+ # ],
+ # "admins": {"type": "Admin", "password": "0wn3d"}
+ # }
+ # EOF
+ # # Deserializer method.
+ # def deserialize_obj(obj, safe_types = %w(User Account Admin))
+ # type = obj.is_a?(Hash) && obj["type"]
+ # safe_types.include?(type) ? Object.const_get(type).new(obj) : obj
+ # end
+ # # Call to JSON.load
+ # ruby = JSON.load(json, proc {|obj|
+ # case obj
+ # when Hash
+ # obj.each {|k, v| obj[k] = deserialize_obj v }
+ # when Array
+ # obj.map! {|v| deserialize_obj v }
+ # end
+ # obj
+ # })
+ # pp ruby
+ # Output:
+ # {"users"=>
+ # [#<User:0x00000000064c4c98
+ # @attributes=
+ # {"type"=>"User", "username"=>"jane", "email"=>"jane@example.com"}>,
+ # #<User:0x00000000064c4bd0
+ # @attributes=
+ # {"type"=>"User", "username"=>"john", "email"=>"john@example.com"}>],
+ # "accounts"=>
+ # [{"account"=>
+ # #<Account:0x00000000064c4928
+ # @attributes={"type"=>"Account", "paid"=>true, "account_id"=>"1234"}>},
+ # {"account"=>
+ # #<Account:0x00000000064c4680
+ # @attributes={"type"=>"Account", "paid"=>false, "account_id"=>"1235"}>}],
+ # "admins"=>
+ # #<Admin:0x00000000064c41f8
+ # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>}
+ #
+ def load(source, proc = nil, options = nil)
+ if proc && options.nil? && proc.is_a?(Hash)
+ options = proc
+ proc = nil
+ end
+
+ opts = if options.nil?
+ if proc && proc.is_a?(Hash)
+ options, proc = proc, nil
+ options
else
- raise TypeError, "can't convert #{opts.class} into Hash"
+ _load_default_options
+ end
+ else
+ _load_default_options.merge(options)
+ end
+
+ unless source.is_a?(String)
+ if source.respond_to? :to_str
+ source = source.to_str
+ elsif source.respond_to? :to_io
+ source = source.to_io.read
+ elsif source.respond_to?(:read)
+ source = source.read
+ end
+ end
+
+ if opts[:allow_blank] && (source.nil? || (String === source && source.empty?))
+ source = 'null'
+ end
+
+ if proc
+ opts = opts.dup
+ opts[:on_load] = proc.to_proc
+ end
+
+ parse(source, opts)
+ end
+
+ # Sets or returns the default options for the JSON.dump method.
+ # Initially:
+ # opts = JSON.dump_default_options
+ # opts # => {:max_nesting=>false, :allow_nan=>true}
+ deprecated_singleton_attr_accessor :dump_default_options
+ @dump_default_options = {
+ :max_nesting => false,
+ :allow_nan => true,
+ }
+
+ # :call-seq:
+ # JSON.dump(obj, io = nil, limit = nil)
+ #
+ # Dumps +obj+ as a \JSON string, i.e. calls generate on the object and returns the result.
+ #
+ # The default options can be changed via method JSON.dump_default_options.
+ #
+ # - Argument +io+, if given, should respond to method +write+;
+ # the \JSON \String is written to +io+, and +io+ is returned.
+ # If +io+ is not given, the \JSON \String is returned.
+ # - Argument +limit+, if given, is passed to JSON.generate as option +max_nesting+.
+ #
+ # ---
+ #
+ # When argument +io+ is not given, returns the \JSON \String generated from +obj+:
+ # obj = {foo: [0, 1], bar: {baz: 2, bat: 3}, bam: :bad}
+ # json = JSON.dump(obj)
+ # json # => "{\"foo\":[0,1],\"bar\":{\"baz\":2,\"bat\":3},\"bam\":\"bad\"}"
+ #
+ # When argument +io+ is given, writes the \JSON \String to +io+ and returns +io+:
+ # path = 't.json'
+ # File.open(path, 'w') do |file|
+ # JSON.dump(obj, file)
+ # end # => #<File:t.json (closed)>
+ # puts File.read(path)
+ # Output:
+ # {"foo":[0,1],"bar":{"baz":2,"bat":3},"bam":"bad"}
+ def dump(obj, anIO = nil, limit = nil, kwargs = nil)
+ if kwargs.nil?
+ if limit.nil?
+ if anIO.is_a?(Hash)
+ kwargs = anIO
+ anIO = nil
+ end
+ elsif limit.is_a?(Hash)
+ kwargs = limit
+ limit = nil
+ end
+ end
+
+ unless anIO.nil?
+ if anIO.respond_to?(:to_io)
+ anIO = anIO.to_io
+ elsif limit.nil? && !anIO.respond_to?(:write)
+ anIO, limit = nil, anIO
end
- state.configure(opts)
end
- state.generate(obj)
+
+ opts = JSON._dump_default_options
+ opts = opts.merge(:max_nesting => limit) if limit
+ opts = opts.merge(kwargs) if kwargs
+
+ begin
+ State.generate(obj, opts, anIO)
+ rescue JSON::NestingError
+ raise ArgumentError, "exceed depth limit"
+ end
end
# :stopdoc:
- # I want to deprecate these later, so I'll first be silent about them, and later delete them.
- alias pretty_unparse pretty_generate
- module_function :pretty_unparse
- # :startdoc:
+ # All these were meant to be deprecated circa 2009, but were just set as undocumented
+ # so usage still exist in the wild.
+ def unparse(...)
+ if RUBY_VERSION >= "3.0"
+ warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated
+ else
+ warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1
+ end
+ generate(...)
+ end
+ module_function :unparse
- # Load a ruby data structure from a JSON _source_ and return it. A source can
- # either be a string-like object, an IO like object, or an object responding
- # to the read method. If _proc_ was given, it will be called with any nested
- # Ruby object as an argument recursively in depth first order.
- #
- # This method is part of the implementation of the load/dump interface of
- # Marshal and YAML.
- def load(source, proc = nil)
- if source.respond_to? :to_str
- source = source.to_str
- elsif source.respond_to? :to_io
- source = source.to_io.read
+ def fast_unparse(...)
+ if RUBY_VERSION >= "3.0"
+ warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated
else
- source = source.read
+ warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1
end
- result = parse(source, :max_nesting => false, :allow_nan => true)
- recurse_proc(result, &proc) if proc
- result
+ generate(...)
end
+ module_function :fast_unparse
- def recurse_proc(result, &proc)
- case result
- when Array
- result.each { |x| recurse_proc x, &proc }
- proc.call result
- when Hash
- result.each { |x, y| recurse_proc x, &proc; recurse_proc y, &proc }
- proc.call result
+ def pretty_unparse(...)
+ if RUBY_VERSION >= "3.0"
+ warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated
else
- proc.call result
+ warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1
end
+ pretty_generate(...)
end
+ module_function :fast_unparse
- alias restore load
+ def restore(...)
+ if RUBY_VERSION >= "3.0"
+ warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1, category: :deprecated
+ else
+ warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1
+ end
+ load(...)
+ end
module_function :restore
- # Dumps _obj_ as a JSON string, i.e. calls generate on the object and returns
- # the result.
+ class << self
+ private
+
+ def const_missing(const_name)
+ case const_name
+ when :PRETTY_STATE_PROTOTYPE
+ if RUBY_VERSION >= "3.0"
+ warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated
+ else
+ warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1
+ end
+ state.new(PRETTY_GENERATE_OPTIONS)
+ else
+ super
+ end
+ end
+ end
+ # :startdoc:
+
+ # JSON::Coder holds a parser and generator configuration.
#
- # If anIO (an IO like object or an object that responds to the write method)
- # was given, the resulting JSON is written to it.
+ # module MyApp
+ # JSONC_CODER = JSON::Coder.new(
+ # allow_trailing_comma: true
+ # )
+ # end
#
- # If the number of nested arrays or objects exceeds _limit_ an ArgumentError
- # exception is raised. This argument is similar (but not exactly the
- # same!) to the _limit_ argument in Marshal.dump.
+ # MyApp::JSONC_CODER.load(document)
#
- # This method is part of the implementation of the load/dump interface of
- # Marshal and YAML.
- def dump(obj, anIO = nil, limit = nil)
- if anIO and limit.nil?
- anIO = anIO.to_io if anIO.respond_to?(:to_io)
- unless anIO.respond_to?(:write)
- limit = anIO
- anIO = nil
+ class Coder
+ # :call-seq:
+ # JSON.new(options = nil, &block)
+ #
+ # Argument +options+, if given, contains a \Hash of options for both parsing and generating.
+ # See {Parsing Options}[rdoc-ref:JSON@Parsing+Options],
+ # and {Generating Options}[rdoc-ref:JSON@Generating+Options].
+ #
+ # For generation, the <tt>strict: true</tt> option is always set. When a Ruby object with no native \JSON counterpart is
+ # encountered, the block provided to the initialize method is invoked, and must return a Ruby object that has a native
+ # \JSON counterpart:
+ #
+ # module MyApp
+ # API_JSON_CODER = JSON::Coder.new do |object|
+ # case object
+ # when Time
+ # object.iso8601(3)
+ # else
+ # object # Unknown type, will raise
+ # end
+ # end
+ # end
+ #
+ # puts MyApp::API_JSON_CODER.dump(Time.now.utc) # => "2025-01-21T08:41:44.286Z"
+ #
+ def initialize(options = nil, &as_json)
+ if options.nil?
+ options = { strict: true }
+ else
+ options = options.dup
+ options[:strict] = true
end
+ options[:as_json] = as_json if as_json
+
+ @state = State.new(options).freeze
+ @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)).freeze
end
- limit ||= 0
- result = generate(obj, :allow_nan => true, :max_nesting => limit)
- if anIO
- anIO.write result
- anIO
- else
- result
+
+ # call-seq:
+ # dump(object) -> String
+ # dump(object, io) -> io
+ #
+ # Serialize the given object into a \JSON document.
+ def dump(object, io = nil)
+ @state.generate(object, io)
end
- rescue JSON::NestingError
- raise ArgumentError, "exceed depth limit"
- end
+ alias_method :generate, :dump
- # Shortuct for iconv.
- if String.method_defined?(:encode)
- def self.iconv(to, from, string)
- string.encode(to, from)
+ # call-seq:
+ # load(string) -> Object
+ #
+ # Parse the given \JSON document and return an equivalent Ruby object.
+ def load(source)
+ @parser_config.parse(source)
+ end
+ alias_method :parse, :load
+
+ # call-seq:
+ # load(path) -> Object
+ #
+ # Parse the given \JSON document and return an equivalent Ruby object.
+ def load_file(path)
+ load(File.read(path, encoding: Encoding::UTF_8))
end
- else
- require 'iconv'
- def self.iconv(to, from, string)
- Iconv.conv(to, from, string)
+ end
+
+ module GeneratorMethods
+ # call-seq: to_json(*)
+ #
+ # Converts this object into a JSON string.
+ # If this object doesn't directly maps to a JSON native type,
+ # first convert it to a string (calling #to_s), then converts
+ # it to a JSON string, and returns the result.
+ # This is a fallback, if no special method #to_json was defined for some object.
+ def to_json(state = nil, *)
+ obj = case self
+ when nil, false, true, Integer, Float, Array, Hash
+ self
+ else
+ "#{self}"
+ end
+
+ if state.nil?
+ JSON::State._generate_no_fallback(obj, nil, nil)
+ else
+ JSON::State.from_state(state)._generate_no_fallback(obj)
+ end
end
end
end
@@ -359,42 +1130,44 @@ module ::Kernel
# Outputs _objs_ to STDOUT as JSON strings in the shortest form, that is in
# one line.
def j(*objs)
+ if RUBY_VERSION >= "3.0"
+ warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated
+ else
+ warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1
+ end
+
objs.each do |obj|
- puts JSON::generate(obj, :allow_nan => true, :max_nesting => false)
+ puts JSON.generate(obj, :allow_nan => true, :max_nesting => false)
end
nil
end
- # Ouputs _objs_ to STDOUT as JSON strings in a pretty format, with
+ # Outputs _objs_ to STDOUT as JSON strings in a pretty format, with
# indentation and over many lines.
def jj(*objs)
+ if RUBY_VERSION >= "3.0"
+ warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated
+ else
+ warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1
+ end
+
objs.each do |obj|
- puts JSON::pretty_generate(obj, :allow_nan => true, :max_nesting => false)
+ puts JSON.pretty_generate(obj, :allow_nan => true, :max_nesting => false)
end
nil
end
- # If _object_ is string-like parse the string and return the parsed result as
- # a Ruby data structure. Otherwise generate a JSON text from the Ruby data
+ # If _object_ is string-like, parse the string and return the parsed result as
+ # a Ruby data structure. Otherwise, generate a JSON text from the Ruby data
# structure object and return it.
#
- # The _opts_ argument is passed through to generate/parse respectively, see
+ # The _opts_ argument is passed through to generate/parse respectively. See
# generate and parse for their documentation.
- def JSON(object, *args)
- if object.respond_to? :to_str
- JSON.parse(object.to_str, args.first)
- else
- JSON.generate(object, args.first)
- end
+ def JSON(object, opts = nil)
+ JSON[object, opts]
end
end
-class ::Class
- # Returns true, if this class can be used to create an instance
- # from a serialised JSON string. The class has to implement a class
- # method _json_create_ that expects a hash as first parameter, which includes
- # the required data.
- def json_creatable?
- respond_to?(:json_create)
- end
+class Object
+ include JSON::GeneratorMethods
end
diff --git a/ext/json/lib/json/ext.rb b/ext/json/lib/json/ext.rb
index a5e3148c57..5bacc5e371 100644
--- a/ext/json/lib/json/ext.rb
+++ b/ext/json/lib/json/ext.rb
@@ -1,15 +1,45 @@
+# frozen_string_literal: true
+
require 'json/common'
module JSON
# This module holds all the modules/classes that implement JSON's
# functionality as C extensions.
module Ext
+ class Parser
+ class << self
+ def parse(...)
+ new(...).parse
+ end
+ alias_method :parse, :parse # Allow redefinition by extensions
+ end
+
+ def initialize(source, opts = nil)
+ @source = source
+ @config = Config.new(opts)
+ end
+
+ def source
+ @source.dup
+ end
+
+ def parse
+ @config.parse(@source)
+ end
+ end
+
require 'json/ext/parser'
- require 'json/ext/generator'
- $DEBUG and warn "Using Ext extension for JSON."
- JSON.parser = Parser
- JSON.generator = Generator
+ Ext::Parser::Config = Ext::ParserConfig
+ JSON.parser = Ext::Parser
+
+ if RUBY_ENGINE == 'truffleruby'
+ require 'json/truffle_ruby/generator'
+ JSON.generator = JSON::TruffleRuby::Generator
+ else
+ require 'json/ext/generator'
+ JSON.generator = Generator
+ end
end
- JSON_LOADED = true unless const_defined?(:JSON_LOADED)
+ JSON_LOADED = true unless defined?(JSON::JSON_LOADED)
end
diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb
new file mode 100644
index 0000000000..e4f425af6a
--- /dev/null
+++ b/ext/json/lib/json/ext/generator/state.rb
@@ -0,0 +1,103 @@
+# frozen_string_literal: true
+
+module JSON
+ module Ext
+ module Generator
+ class State
+ # call-seq: new(opts = {})
+ #
+ # Instantiates a new State object, configured by _opts_.
+ #
+ # Argument +opts+, if given, contains a \Hash of options for the generation.
+ # See {Generating Options}[rdoc-ref:JSON@Generating+Options].
+ def initialize(opts = nil)
+ if opts && !opts.empty?
+ configure(opts)
+ end
+ end
+
+ # call-seq: configure(opts)
+ #
+ # Configure this State instance with the Hash _opts_, and return
+ # itself.
+ def configure(opts)
+ unless opts.is_a?(Hash)
+ if opts.respond_to?(:to_hash)
+ opts = opts.to_hash
+ elsif opts.respond_to?(:to_h)
+ opts = opts.to_h
+ else
+ raise TypeError, "can't convert #{opts.class} into Hash"
+ end
+ end
+ _configure(opts)
+ end
+
+ alias_method :merge, :configure
+
+ # call-seq: to_h
+ #
+ # Returns the configuration instance variables as a hash, that can be
+ # passed to the configure method.
+ def to_h
+ result = {
+ indent: indent,
+ space: space,
+ space_before: space_before,
+ object_nl: object_nl,
+ array_nl: array_nl,
+ as_json: as_json,
+ allow_nan: allow_nan?,
+ ascii_only: ascii_only?,
+ max_nesting: max_nesting,
+ script_safe: script_safe?,
+ strict: strict?,
+ depth: depth,
+ buffer_initial_length: buffer_initial_length,
+ }
+
+ allow_duplicate_key = allow_duplicate_key?
+ unless allow_duplicate_key.nil?
+ result[:allow_duplicate_key] = allow_duplicate_key
+ end
+
+ instance_variables.each do |iv|
+ iv = iv.to_s[1..-1]
+ result[iv.to_sym] = self[iv]
+ end
+
+ result
+ end
+
+ alias_method :to_hash, :to_h
+
+ # call-seq: [](name)
+ #
+ # Returns the value returned by method +name+.
+ def [](name)
+ ::JSON.deprecation_warning("JSON::State#[] is deprecated and will be removed in json 3.0.0")
+
+ if respond_to?(name)
+ __send__(name)
+ else
+ instance_variable_get("@#{name}") if
+ instance_variables.include?("@#{name}".to_sym) # avoid warning
+ end
+ end
+
+ # call-seq: []=(name, value)
+ #
+ # Sets the attribute name to value.
+ def []=(name, value)
+ ::JSON.deprecation_warning("JSON::State#[]= is deprecated and will be removed in json 3.0.0")
+
+ if respond_to?(name_writer = "#{name}=")
+ __send__ name_writer, value
+ else
+ instance_variable_set "@#{name}", value
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/ext/json/lib/json/generic_object.rb b/ext/json/lib/json/generic_object.rb
new file mode 100644
index 0000000000..5c8ace354b
--- /dev/null
+++ b/ext/json/lib/json/generic_object.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+begin
+ require 'ostruct'
+rescue LoadError
+ warn "JSON::GenericObject requires 'ostruct'. Please install it with `gem install ostruct`."
+end
+
+module JSON
+ class GenericObject < OpenStruct
+ class << self
+ alias [] new
+
+ def json_creatable?
+ @json_creatable
+ end
+
+ attr_writer :json_creatable
+
+ def json_create(data)
+ data = data.dup
+ data.delete JSON.create_id
+ self[data]
+ end
+
+ def from_hash(object)
+ case
+ when object.respond_to?(:to_hash)
+ result = new
+ object.to_hash.each do |key, value|
+ result[key] = from_hash(value)
+ end
+ result
+ when object.respond_to?(:to_ary)
+ object.to_ary.map { |a| from_hash(a) }
+ else
+ object
+ end
+ end
+
+ def load(source, proc = nil, opts = {})
+ result = ::JSON.load(source, proc, opts.merge(:object_class => self))
+ result.nil? ? new : result
+ end
+
+ def dump(obj, *args)
+ ::JSON.dump(obj, *args)
+ end
+ end
+ self.json_creatable = false
+
+ def to_hash
+ table
+ end
+
+ def |(other)
+ self.class[other.to_hash.merge(to_hash)]
+ end
+
+ def as_json(*)
+ { JSON.create_id => self.class.name }.merge to_hash
+ end
+
+ def to_json(*a)
+ as_json.to_json(*a)
+ end
+ end if defined?(::OpenStruct)
+end
diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb
index beff08b1c7..30c0a71d2f 100644
--- a/ext/json/lib/json/version.rb
+++ b/ext/json/lib/json/version.rb
@@ -1,8 +1,5 @@
+# frozen_string_literal: true
+
module JSON
- # JSON version
- VERSION = '1.5.0'
- VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
- VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
- VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
- VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
+ VERSION = '2.19.8'
end
diff --git a/ext/json/parser/depend b/ext/json/parser/depend
index 5eaf6dd040..d4737b1dfb 100644
--- a/ext/json/parser/depend
+++ b/ext/json/parser/depend
@@ -1 +1,182 @@
-parser.o: parser.c parser.h
+$(OBJS): $(ruby_headers)
+parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h
+
+# AUTOGENERATED DEPENDENCIES START
+parser.o: $(RUBY_EXTCONF_H)
+parser.o: $(arch_hdrdir)/ruby/config.h
+parser.o: $(hdrdir)/ruby.h
+parser.o: $(hdrdir)/ruby/assert.h
+parser.o: $(hdrdir)/ruby/backward.h
+parser.o: $(hdrdir)/ruby/backward/2/assume.h
+parser.o: $(hdrdir)/ruby/backward/2/attributes.h
+parser.o: $(hdrdir)/ruby/backward/2/bool.h
+parser.o: $(hdrdir)/ruby/backward/2/inttypes.h
+parser.o: $(hdrdir)/ruby/backward/2/limits.h
+parser.o: $(hdrdir)/ruby/backward/2/long_long.h
+parser.o: $(hdrdir)/ruby/backward/2/stdalign.h
+parser.o: $(hdrdir)/ruby/backward/2/stdarg.h
+parser.o: $(hdrdir)/ruby/defines.h
+parser.o: $(hdrdir)/ruby/encoding.h
+parser.o: $(hdrdir)/ruby/intern.h
+parser.o: $(hdrdir)/ruby/internal/abi.h
+parser.o: $(hdrdir)/ruby/internal/anyargs.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/char.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/double.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/int.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/long.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/short.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h
+parser.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h
+parser.o: $(hdrdir)/ruby/internal/assume.h
+parser.o: $(hdrdir)/ruby/internal/attr/alloc_size.h
+parser.o: $(hdrdir)/ruby/internal/attr/artificial.h
+parser.o: $(hdrdir)/ruby/internal/attr/cold.h
+parser.o: $(hdrdir)/ruby/internal/attr/const.h
+parser.o: $(hdrdir)/ruby/internal/attr/constexpr.h
+parser.o: $(hdrdir)/ruby/internal/attr/deprecated.h
+parser.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h
+parser.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h
+parser.o: $(hdrdir)/ruby/internal/attr/error.h
+parser.o: $(hdrdir)/ruby/internal/attr/flag_enum.h
+parser.o: $(hdrdir)/ruby/internal/attr/forceinline.h
+parser.o: $(hdrdir)/ruby/internal/attr/format.h
+parser.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h
+parser.o: $(hdrdir)/ruby/internal/attr/noalias.h
+parser.o: $(hdrdir)/ruby/internal/attr/nodiscard.h
+parser.o: $(hdrdir)/ruby/internal/attr/noexcept.h
+parser.o: $(hdrdir)/ruby/internal/attr/noinline.h
+parser.o: $(hdrdir)/ruby/internal/attr/nonnull.h
+parser.o: $(hdrdir)/ruby/internal/attr/noreturn.h
+parser.o: $(hdrdir)/ruby/internal/attr/packed_struct.h
+parser.o: $(hdrdir)/ruby/internal/attr/pure.h
+parser.o: $(hdrdir)/ruby/internal/attr/restrict.h
+parser.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h
+parser.o: $(hdrdir)/ruby/internal/attr/warning.h
+parser.o: $(hdrdir)/ruby/internal/attr/weakref.h
+parser.o: $(hdrdir)/ruby/internal/cast.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/apple.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/clang.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/intel.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h
+parser.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h
+parser.o: $(hdrdir)/ruby/internal/compiler_since.h
+parser.o: $(hdrdir)/ruby/internal/config.h
+parser.o: $(hdrdir)/ruby/internal/constant_p.h
+parser.o: $(hdrdir)/ruby/internal/core.h
+parser.o: $(hdrdir)/ruby/internal/core/rarray.h
+parser.o: $(hdrdir)/ruby/internal/core/rbasic.h
+parser.o: $(hdrdir)/ruby/internal/core/rbignum.h
+parser.o: $(hdrdir)/ruby/internal/core/rclass.h
+parser.o: $(hdrdir)/ruby/internal/core/rdata.h
+parser.o: $(hdrdir)/ruby/internal/core/rfile.h
+parser.o: $(hdrdir)/ruby/internal/core/rhash.h
+parser.o: $(hdrdir)/ruby/internal/core/robject.h
+parser.o: $(hdrdir)/ruby/internal/core/rregexp.h
+parser.o: $(hdrdir)/ruby/internal/core/rstring.h
+parser.o: $(hdrdir)/ruby/internal/core/rstruct.h
+parser.o: $(hdrdir)/ruby/internal/core/rtypeddata.h
+parser.o: $(hdrdir)/ruby/internal/ctype.h
+parser.o: $(hdrdir)/ruby/internal/dllexport.h
+parser.o: $(hdrdir)/ruby/internal/dosish.h
+parser.o: $(hdrdir)/ruby/internal/encoding/coderange.h
+parser.o: $(hdrdir)/ruby/internal/encoding/ctype.h
+parser.o: $(hdrdir)/ruby/internal/encoding/encoding.h
+parser.o: $(hdrdir)/ruby/internal/encoding/pathname.h
+parser.o: $(hdrdir)/ruby/internal/encoding/re.h
+parser.o: $(hdrdir)/ruby/internal/encoding/sprintf.h
+parser.o: $(hdrdir)/ruby/internal/encoding/string.h
+parser.o: $(hdrdir)/ruby/internal/encoding/symbol.h
+parser.o: $(hdrdir)/ruby/internal/encoding/transcode.h
+parser.o: $(hdrdir)/ruby/internal/error.h
+parser.o: $(hdrdir)/ruby/internal/eval.h
+parser.o: $(hdrdir)/ruby/internal/event.h
+parser.o: $(hdrdir)/ruby/internal/fl_type.h
+parser.o: $(hdrdir)/ruby/internal/gc.h
+parser.o: $(hdrdir)/ruby/internal/glob.h
+parser.o: $(hdrdir)/ruby/internal/globals.h
+parser.o: $(hdrdir)/ruby/internal/has/attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/builtin.h
+parser.o: $(hdrdir)/ruby/internal/has/c_attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h
+parser.o: $(hdrdir)/ruby/internal/has/extension.h
+parser.o: $(hdrdir)/ruby/internal/has/feature.h
+parser.o: $(hdrdir)/ruby/internal/has/warning.h
+parser.o: $(hdrdir)/ruby/internal/intern/array.h
+parser.o: $(hdrdir)/ruby/internal/intern/bignum.h
+parser.o: $(hdrdir)/ruby/internal/intern/class.h
+parser.o: $(hdrdir)/ruby/internal/intern/compar.h
+parser.o: $(hdrdir)/ruby/internal/intern/complex.h
+parser.o: $(hdrdir)/ruby/internal/intern/cont.h
+parser.o: $(hdrdir)/ruby/internal/intern/dir.h
+parser.o: $(hdrdir)/ruby/internal/intern/enum.h
+parser.o: $(hdrdir)/ruby/internal/intern/enumerator.h
+parser.o: $(hdrdir)/ruby/internal/intern/error.h
+parser.o: $(hdrdir)/ruby/internal/intern/eval.h
+parser.o: $(hdrdir)/ruby/internal/intern/file.h
+parser.o: $(hdrdir)/ruby/internal/intern/hash.h
+parser.o: $(hdrdir)/ruby/internal/intern/io.h
+parser.o: $(hdrdir)/ruby/internal/intern/load.h
+parser.o: $(hdrdir)/ruby/internal/intern/marshal.h
+parser.o: $(hdrdir)/ruby/internal/intern/numeric.h
+parser.o: $(hdrdir)/ruby/internal/intern/object.h
+parser.o: $(hdrdir)/ruby/internal/intern/parse.h
+parser.o: $(hdrdir)/ruby/internal/intern/proc.h
+parser.o: $(hdrdir)/ruby/internal/intern/process.h
+parser.o: $(hdrdir)/ruby/internal/intern/random.h
+parser.o: $(hdrdir)/ruby/internal/intern/range.h
+parser.o: $(hdrdir)/ruby/internal/intern/rational.h
+parser.o: $(hdrdir)/ruby/internal/intern/re.h
+parser.o: $(hdrdir)/ruby/internal/intern/ruby.h
+parser.o: $(hdrdir)/ruby/internal/intern/select.h
+parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h
+parser.o: $(hdrdir)/ruby/internal/intern/set.h
+parser.o: $(hdrdir)/ruby/internal/intern/signal.h
+parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h
+parser.o: $(hdrdir)/ruby/internal/intern/string.h
+parser.o: $(hdrdir)/ruby/internal/intern/struct.h
+parser.o: $(hdrdir)/ruby/internal/intern/thread.h
+parser.o: $(hdrdir)/ruby/internal/intern/time.h
+parser.o: $(hdrdir)/ruby/internal/intern/variable.h
+parser.o: $(hdrdir)/ruby/internal/intern/vm.h
+parser.o: $(hdrdir)/ruby/internal/interpreter.h
+parser.o: $(hdrdir)/ruby/internal/iterator.h
+parser.o: $(hdrdir)/ruby/internal/memory.h
+parser.o: $(hdrdir)/ruby/internal/method.h
+parser.o: $(hdrdir)/ruby/internal/module.h
+parser.o: $(hdrdir)/ruby/internal/newobj.h
+parser.o: $(hdrdir)/ruby/internal/scan_args.h
+parser.o: $(hdrdir)/ruby/internal/special_consts.h
+parser.o: $(hdrdir)/ruby/internal/static_assert.h
+parser.o: $(hdrdir)/ruby/internal/stdalign.h
+parser.o: $(hdrdir)/ruby/internal/stdbool.h
+parser.o: $(hdrdir)/ruby/internal/stdckdint.h
+parser.o: $(hdrdir)/ruby/internal/symbol.h
+parser.o: $(hdrdir)/ruby/internal/value.h
+parser.o: $(hdrdir)/ruby/internal/value_type.h
+parser.o: $(hdrdir)/ruby/internal/variable.h
+parser.o: $(hdrdir)/ruby/internal/warning_push.h
+parser.o: $(hdrdir)/ruby/internal/xmalloc.h
+parser.o: $(hdrdir)/ruby/missing.h
+parser.o: $(hdrdir)/ruby/onigmo.h
+parser.o: $(hdrdir)/ruby/oniguruma.h
+parser.o: $(hdrdir)/ruby/ruby.h
+parser.o: $(hdrdir)/ruby/st.h
+parser.o: $(hdrdir)/ruby/subst.h
+parser.o: $(srcdir)/../fbuffer/fbuffer.h
+parser.o: $(srcdir)/../json.h
+parser.o: $(srcdir)/../simd/simd.h
+parser.o: $(srcdir)/../vendor/ryu.h
+parser.o: parser.c
+# AUTOGENERATED DEPENDENCIES END
diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb
index 64d1716a7a..a9d740c755 100644
--- a/ext/json/parser/extconf.rb
+++ b/ext/json/parser/extconf.rb
@@ -1,5 +1,21 @@
+# frozen_string_literal: true
require 'mkmf'
-require 'rbconfig'
-have_header("re.h")
+$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0"
+have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
+have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
+have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
+have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
+have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1
+
+if RUBY_ENGINE == "ruby"
+ have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3
+end
+
+append_cflags("-std=c99")
+
+if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
+ load __dir__ + "/../simd/conf.rb"
+end
+
create_makefile 'json/ext/parser'
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 418c1c32f0..c0631728c3 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1,46 +1,525 @@
+#include "../json.h"
+#include "../vendor/ryu.h"
+#include "../simd/simd.h"
-#line 1 "parser.rl"
-#include "parser.h"
+static VALUE mJSON, eNestingError, Encoding_UTF_8;
+static VALUE CNaN, CInfinity, CMinusInfinity;
-/* unicode */
+static ID i_new, i_try_convert, i_uminus, i_encode;
-static const char digit_values[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
+ sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
+ sym_allow_duplicate_key;
+
+static int binary_encindex;
+static int utf8_encindex;
+
+#ifndef HAVE_RB_HASH_BULK_INSERT
+// For TruffleRuby
+static void
+rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
+{
+ long index = 0;
+ while (index < count) {
+ VALUE name = pairs[index++];
+ VALUE value = pairs[index++];
+ rb_hash_aset(hash, name, value);
+ }
+ RB_GC_GUARD(hash);
+}
+#endif
+
+#ifndef HAVE_RB_HASH_NEW_CAPA
+#define rb_hash_new_capa(n) rb_hash_new()
+#endif
+
+#ifndef HAVE_RB_STR_TO_INTERNED_STR
+static VALUE rb_str_to_interned_str(VALUE str)
+{
+ return rb_funcall(rb_str_freeze(str), i_uminus, 0);
+}
+#endif
+
+/* name cache */
+
+#include <string.h>
+#include <ctype.h>
+
+// Object names are likely to be repeated, and are frozen.
+// As such we can re-use them if we keep a cache of the ones we've seen so far,
+// and save much more expensive lookups into the global fstring table.
+// This cache implementation is deliberately simple, as we're optimizing for compactness,
+// to be able to fit safely on the stack.
+// As such, binary search into a sorted array gives a good tradeoff between compactness and
+// performance.
+#define JSON_RVALUE_CACHE_CAPA 63
+typedef struct rvalue_cache_struct {
+ int length;
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
+} rvalue_cache;
+
+static rb_encoding *enc_utf8;
+
+#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
+
+static inline VALUE build_interned_string(const char *str, const long length)
+{
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ return rb_enc_interned_str(str, length, enc_utf8);
+# else
+ VALUE rstring = rb_utf8_str_new(str, length);
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
+# endif
+}
+
+static inline VALUE build_symbol(const char *str, const long length)
+{
+ return rb_str_intern(build_interned_string(str, length));
+}
+
+static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
+{
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
+ cache->length++;
+ cache->entries[index] = rstring;
+}
+
+#define rstring_cache_memcmp memcmp
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+#if __has_builtin(__builtin_bswap64)
+#undef rstring_cache_memcmp
+ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
+{
+ // The libc memcmp has numerous complex optimizations, but in this particular case,
+ // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
+ // inline a simpler memcmp outperforms calling the libc version.
+ long i = 0;
+
+ for (; i + 8 <= length; i += 8) {
+ uint64_t a, b;
+ memcpy(&a, str + i, 8);
+ memcpy(&b, rptr + i, 8);
+ if (a != b) {
+ a = __builtin_bswap64(a);
+ b = __builtin_bswap64(b);
+ return (a < b) ? -1 : 1;
+ }
+ }
+
+ for (; i < length; i++) {
+ if (str[i] != rptr[i]) {
+ return (str[i] < rptr[i]) ? -1 : 1;
+ }
+ }
+
+ return 0;
+}
+#endif
+#endif
+
+ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
+{
+ const char *rstring_ptr;
+ long rstring_length;
+
+ RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
+
+ if (length == rstring_length) {
+ return rstring_cache_memcmp(str, rstring_ptr, length);
+ } else {
+ return (int)(length - rstring_length);
+ }
+}
+
+ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ int low = 0;
+ int high = cache->length - 1;
+
+ while (low <= high) {
+ int mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ int cmp = rstring_cache_cmp(str, length, entry);
+
+ if (cmp == 0) {
+ return entry;
+ } else if (cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ VALUE rstring = build_interned_string(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ rvalue_cache_insert_at(cache, low, rstring);
+ }
+ return rstring;
+}
+
+static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ int low = 0;
+ int high = cache->length - 1;
+
+ while (low <= high) {
+ int mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
+
+ if (cmp == 0) {
+ return entry;
+ } else if (cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ VALUE rsymbol = build_symbol(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ rvalue_cache_insert_at(cache, low, rsymbol);
+ }
+ return rsymbol;
+}
+
+/* rvalue stack */
+
+#define RVALUE_STACK_INITIAL_CAPA 128
+
+enum rvalue_stack_type {
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
+ RVALUE_STACK_STACK_ALLOCATED = 1,
};
-static UTF32 unescape_unicode(const unsigned char *p)
-{
- char b;
- UTF32 result = 0;
- b = digit_values[p[0]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | b;
- b = digit_values[p[1]];
- result = (result << 4) | b;
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- b = digit_values[p[2]];
- result = (result << 4) | b;
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- b = digit_values[p[3]];
- result = (result << 4) | b;
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- return result;
+typedef struct rvalue_stack_struct {
+ enum rvalue_stack_type type;
+ long capa;
+ long head;
+ VALUE *ptr;
+} rvalue_stack;
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
+
+static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
+ } else {
+ JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
+{
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
+ }
+ stack->ptr[stack->head] = value;
+ stack->head++;
+ return value;
+}
+
+static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
+{
+ return stack->ptr + (stack->head - count);
+}
+
+static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
+{
+ stack->head -= count;
+}
+
+static void rvalue_stack_mark(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ long index;
+ if (stack && stack->ptr) {
+ for (index = 0; index < stack->head; index++) {
+ rb_gc_mark_movable(stack->ptr[index]);
+ }
+ }
+}
+
+static void rvalue_stack_free_buffer(rvalue_stack *stack)
+{
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
+ stack->ptr = NULL;
+}
+
+static void rvalue_stack_free(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ if (stack) {
+ rvalue_stack_free_buffer(stack);
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ JSON_SIZED_FREE(stack);
+#endif
+ }
+}
+
+static size_t rvalue_stack_memsize(const void *ptr)
+{
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
+ size_t memsize = sizeof(VALUE) * stack->capa;
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ memsize += sizeof(rvalue_stack);
+#endif
+ return memsize;
+}
+
+static void rvalue_stack_compact(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ long index;
+ if (stack && stack->ptr) {
+ for (index = 0; index < stack->head; index++) {
+ stack->ptr[index] = rb_gc_location(stack->ptr[index]);
+ }
+ }
+}
+
+static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
+ .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
+ .function = {
+ .dmark = rvalue_stack_mark,
+ .dfree = rvalue_stack_free,
+ .dsize = rvalue_stack_memsize,
+ .dcompact = rvalue_stack_compact,
+ },
+ // We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED
+ // because it churns a lot of values so trigering write barriers every time is very costly.
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
+};
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ rvalue_stack *stack;
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
+ return stack;
}
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
+static void rvalue_stack_eagerly_release(VALUE handle)
+{
+ if (handle) {
+ rvalue_stack *stack;
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ rvalue_stack_free_buffer(stack);
+#else
+ rvalue_stack_free(stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+#endif
+ }
+}
+
+/* frame stack */
+
+// Iterative (non-recursive) parsing keeps an explicit stack of the containers
+// currently being built, instead of relying on the C call stack. Each frame
+// only needs enough bookkeeping to close its container: which kind it is, the
+// rvalue_stack position where its children start (so we know how many to pop),
+// and the cursor at its opening brace (used to rewind for duplicate key
+// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses
+// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that
+// it's freed even if parsing raises.
+//
+// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release
+// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts
+// -- the element type and the absence of a mark function are the only real
+// differences. Keep the two in sync: a fix to the spill/release or
+// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the
+// other.
+#define JSON_FRAME_STACK_INITIAL_CAPA 32
+
+enum json_frame_type {
+ JSON_FRAME_ROOT, // == JSON_PHASE_DONE
+ JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
+ JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
+};
+
+// Where a frame is within its container's grammar. This is the entirety of the
+// parser's "what to do next" state: json_parse_any dispatches on the top
+// frame's phase and holds no resume state in C locals, so a parse can stop at
+// any value boundary and be resumed purely from the (persistable) frame stack.
+//
+// The first three phases are deliberately equal to the corresponding json_frame_type
+// to simplify the transition of phase in json_value_completed.
+enum json_frame_phase {
+ JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
+ JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
+ JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
+ JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
+ JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
+ JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
+};
+
+typedef struct json_frame_struct {
+ enum json_frame_type type;
+ enum json_frame_phase phase;
+ long value_stack_head; // rvalue_stack->head when this container opened
+ const char *start_cursor; // object frames only (the '{'); NULL otherwise
+} json_frame;
+
+typedef struct json_frame_stack_struct {
+ enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated
+ long capa;
+ long head;
+ json_frame *ptr;
+} json_frame_stack;
+
+enum duplicate_key_action {
+ JSON_DEPRECATED = 0,
+ JSON_IGNORE,
+ JSON_RAISE,
+};
+
+typedef struct JSON_ParserStruct {
+ VALUE on_load_proc;
+ VALUE decimal_class;
+ ID decimal_method_id;
+ enum duplicate_key_action on_duplicate_key;
+ int max_nesting;
+ bool allow_nan;
+ bool allow_trailing_comma;
+ bool allow_control_characters;
+ bool allow_invalid_escape;
+ bool symbolize_names;
+ bool freeze;
+} JSON_ParserConfig;
+
+typedef struct JSON_ParserStateStruct {
+ VALUE *value_stack_handle;
+ VALUE *frame_stack_handle;
+ const char *start;
+ const char *cursor;
+ const char *end;
+ rvalue_stack *value_stack;
+ json_frame_stack *frames;
+ rvalue_cache name_cache;
+ int in_array;
+ int current_nesting;
+ unsigned int emitted_deprecations;
+} JSON_ParserState;
+
+static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref);
+
+static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = json_frame_stack_spill(stack, handle, stack_ref);
+ } else {
+ JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame)
+{
+ json_frame_stack *stack = state->frames;
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames);
+ }
+
+ json_frame *frame_ptr = &stack->ptr[stack->head++];
+ *frame_ptr = frame;
+ return frame_ptr;
+}
+
+static inline json_frame *json_frame_stack_peek(json_frame_stack *stack)
+{
+ return &stack->ptr[stack->head - 1];
+}
+
+static inline void json_frame_stack_pop(json_frame_stack *stack)
+{
+ stack->head--;
+}
+
+static void json_frame_stack_free_buffer(json_frame_stack *stack)
+{
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
+ stack->ptr = NULL;
+}
+
+static void json_frame_stack_free(void *ptr)
+{
+ json_frame_stack *stack = (json_frame_stack *)ptr;
+ if (stack) {
+ json_frame_stack_free_buffer(stack);
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ JSON_SIZED_FREE(stack);
+#endif
+ }
+}
+
+static size_t json_frame_stack_memsize(const void *ptr)
+{
+ const json_frame_stack *stack = (const json_frame_stack *)ptr;
+
+ size_t memsize = sizeof(json_frame) * stack->capa;
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ memsize += sizeof(json_frame_stack);
+#endif
+ return memsize;
+}
+
+static const rb_data_type_t JSON_Parser_frame_stack_type = {
+ .wrap_struct_name = "JSON::Ext::Parser/frame_stack",
+ .function = {
+ .dmark = NULL,
+ .dfree = json_frame_stack_free,
+ .dsize = json_frame_stack_memsize,
+ },
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
+};
+
+static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
+{
+ json_frame_stack *stack;
+ *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, json_frame_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(json_frame, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head);
+ return stack;
+}
+
+static void json_frame_stack_eagerly_release(VALUE handle)
+{
+ if (handle) {
+ json_frame_stack *stack;
+ TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ json_frame_stack_free_buffer(stack);
+#else
+ json_frame_stack_free(stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+#endif
+ }
+}
+
+static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
{
int len = 1;
if (ch <= 0x7F) {
@@ -66,1458 +545,1234 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
return len;
}
-#ifdef HAVE_RUBY_ENCODING_H
-static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
- CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
-static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
-#else
-static ID i_iconv;
+static inline size_t rest(JSON_ParserState *state) {
+ return state->end - state->cursor;
+}
+
+static inline bool eos(JSON_ParserState *state) {
+ return state->cursor >= state->end;
+}
+
+static inline char peek(JSON_ParserState *state)
+{
+ if (RB_UNLIKELY(eos(state))) {
+ return 0;
+ }
+ return *state->cursor;
+}
+
+static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
+{
+ JSON_ASSERT(state->cursor <= state->end);
+
+ // Redundant but helpful for hardening
+ if (RB_UNLIKELY(state->cursor > state->end)) {
+ state->cursor = state->end;
+ }
+
+ const char *cursor = state->cursor;
+ long column = 0;
+ long line = 1;
+
+ while (cursor >= state->start) {
+ if (*cursor-- == '\n') {
+ break;
+ }
+ column++;
+ }
+
+ while (cursor >= state->start) {
+ if (*cursor-- == '\n') {
+ line++;
+ }
+ }
+ *line_out = line;
+ *column_out = column;
+}
+
+static void emit_parse_warning(const char *message, JSON_ParserState *state)
+{
+ long line, column;
+ cursor_position(state, &line, &column);
+
+ VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
+ rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
+}
+
+#define PARSE_ERROR_FRAGMENT_LEN 32
+
+static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
+{
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
+
+ const char *ptr = "EOF";
+ if (state->cursor && state->cursor < state->end) {
+ ptr = state->cursor;
+ size_t len = 0;
+ while (len < PARSE_ERROR_FRAGMENT_LEN) {
+ char ch = ptr[len];
+ if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
+ break;
+ }
+ len++;
+ }
+
+ if (len) {
+ buffer[0] = '\'';
+ MEMCPY(buffer + 1, ptr, char, len);
+
+ while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
+ len--;
+ }
+
+ if (buffer[len] >= 0xC0) { // multibyte character start
+ len--;
+ }
+
+ buffer[len + 1] = '\'';
+ buffer[len + 2] = '\0';
+ ptr = (const char *)buffer;
+ }
+ }
+
+ VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
+ rb_str_catf(message, " at line %ld column %ld", line, column);
+ return message;
+}
+
+static VALUE parse_error_new(VALUE message, long line, long column)
+{
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
+ return exc;
+}
+
+NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
+{
+ long line, column;
+ cursor_position(state, &line, &column);
+ VALUE message = build_parse_error_message(format, state, line, column);
+ rb_exc_raise(parse_error_new(message, line, column));
+}
+
+NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
+{
+ state->cursor = at;
+ raise_parse_error(format, state);
+}
+
+/* unicode */
+
+static const signed char digit_values[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1
+};
+
+static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
+{
+ if (RB_UNLIKELY(sp > spe - 4)) {
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+ }
+
+ const unsigned char *p = (const unsigned char *)sp;
+
+ const signed char b0 = digit_values[p[0]];
+ const signed char b1 = digit_values[p[1]];
+ const signed char b2 = digit_values[p[2]];
+ const signed char b3 = digit_values[p[3]];
+
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+ }
+
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
+}
+
+#define GET_PARSER_CONFIG \
+ JSON_ParserConfig *config; \
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
+
+static const rb_data_type_t JSON_ParserConfig_type;
+
+NOINLINE(static) void
+json_eat_comments(JSON_ParserState *state)
+{
+ const char *start = state->cursor;
+ state->cursor++;
+
+ switch (peek(state)) {
+ case '/': {
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
+ if (!state->cursor) {
+ state->cursor = state->end;
+ } else {
+ state->cursor++;
+ }
+ break;
+ }
+ case '*': {
+ state->cursor++;
+
+ while (true) {
+ const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
+ if (!next_match) {
+ raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
+ }
+
+ state->cursor = next_match + 1;
+ if (peek(state) == '/') {
+ state->cursor++;
+ break;
+ }
+ }
+ break;
+ }
+ default:
+ raise_parse_error_at("unexpected token %s", state, start);
+ break;
+ }
+}
+
+ALWAYS_INLINE(static) void
+json_eat_whitespace(JSON_ParserState *state)
+{
+ while (true) {
+ switch (peek(state)) {
+ case ' ':
+ state->cursor++;
+ break;
+ case '\n':
+ state->cursor++;
+
+ // Heuristic: if we see a newline, there is likely consecutive spaces after it.
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+ while (rest(state) > 8) {
+ uint64_t chunk;
+ memcpy(&chunk, state->cursor, sizeof(uint64_t));
+ if (chunk == 0x2020202020202020) {
+ state->cursor += 8;
+ continue;
+ }
+
+ uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
+ state->cursor += consecutive_spaces;
+ break;
+ }
#endif
+ break;
+ case '\t':
+ case '\r':
+ state->cursor++;
+ break;
+ case '/':
+ json_eat_comments(state);
+ break;
+
+ default:
+ return;
+ }
+ }
+}
-static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
-static VALUE CNaN, CInfinity, CMinusInfinity;
+static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
+{
+ if (symbolize) {
+ intern = true;
+ }
+ VALUE result;
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ if (intern) {
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
+ } else {
+ result = rb_utf8_str_new(start, (long)(end - start));
+ }
+# else
+ result = rb_utf8_str_new(start, (long)(end - start));
+ if (intern) {
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
+ }
+# endif
+
+ if (symbolize) {
+ result = rb_str_intern(result);
+ }
+
+ return result;
+}
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class,
- i_array_class, i_key_p, i_deep_const_get;
-
-
-#line 108 "parser.rl"
-
-
-
-#line 90 "parser.c"
-static const int JSON_object_start = 1;
-static const int JSON_object_first_final = 27;
-static const int JSON_object_error = 0;
-
-static const int JSON_object_en_main = 1;
-
-
-#line 144 "parser.rl"
-
-
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
-
- if (json->max_nesting && json->current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
- }
-
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
-
-
-#line 114 "parser.c"
- {
- cs = JSON_object_start;
- }
-
-#line 159 "parser.rl"
-
-#line 121 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 123 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 13: goto st2;
- case 32: goto st2;
- case 34: goto tr2;
- case 47: goto st23;
- case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st2;
- goto st0;
-tr2:
-#line 127 "parser.rl"
- {
- char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, p, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;}
- }
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 162 "parser.c"
- switch( (*p) ) {
- case 13: goto st3;
- case 32: goto st3;
- case 47: goto st4;
- case 58: goto st8;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st3;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st5;
- case 47: goto st7;
- }
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 42 )
- goto st6;
- goto st5;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st3;
- }
- goto st5;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 10 )
- goto st3;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 13: goto st8;
- case 32: goto st8;
- case 34: goto tr11;
- case 45: goto tr11;
- case 47: goto st19;
- case 73: goto tr11;
- case 78: goto tr11;
- case 91: goto tr11;
- case 102: goto tr11;
- case 110: goto tr11;
- case 116: goto tr11;
- case 123: goto tr11;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr11;
- } else if ( (*p) >= 9 )
- goto st8;
- goto st0;
-tr11:
-#line 116 "parser.rl"
- {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, p, pe, &v);
- if (np == NULL) {
- p--; {p++; cs = 9; goto _out;}
+static inline bool json_string_cacheable_p(const char *string, size_t length)
+{
+ // We mostly want to cache strings that are likely to be repeated.
+ // Simple heuristics:
+ // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
+ // - If the first character isn't a letter, we're much less likely to see this string again.
+ return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
+}
+
+static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
+{
+ bool intern = is_name || config->freeze;
+ bool symbolize = is_name && config->symbolize_names;
+ size_t bufferSize = stringEnd - string;
+
+ if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
+ VALUE cached_key;
+ if (RB_UNLIKELY(symbolize)) {
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
} else {
- rb_hash_aset(*result, last_name, v);
- {p = (( np))-1;}
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
}
+
+ if (RB_LIKELY(cached_key)) {
+ return cached_key;
+ }
+ }
+
+ return build_string(string, stringEnd, intern, symbolize);
+}
+
+#define JSON_MAX_UNESCAPE_POSITIONS 16
+typedef struct _json_unescape_positions {
+ long size;
+ const char **positions;
+ unsigned long additional_backslashes;
+} JSON_UnescapePositions;
+
+static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
+{
+ while (positions->size) {
+ positions->size--;
+ const char *next_position = positions->positions[0];
+ positions->positions++;
+ if (next_position >= pe) {
+ return next_position;
+ }
+ }
+
+ if (positions->additional_backslashes) {
+ positions->additional_backslashes--;
+ return memchr(pe, '\\', stringEnd - pe);
}
- goto st9;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
-#line 245 "parser.c"
- switch( (*p) ) {
- case 13: goto st9;
- case 32: goto st9;
- case 44: goto st10;
- case 47: goto st15;
- case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st9;
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- switch( (*p) ) {
- case 13: goto st10;
- case 32: goto st10;
- case 34: goto tr2;
- case 47: goto st11;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st10;
- goto st0;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- switch( (*p) ) {
- case 42: goto st12;
- case 47: goto st14;
- }
- goto st0;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 42 )
- goto st13;
- goto st12;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- switch( (*p) ) {
- case 42: goto st13;
- case 47: goto st10;
- }
- goto st12;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 10 )
- goto st10;
- goto st14;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- switch( (*p) ) {
- case 42: goto st16;
- case 47: goto st18;
- }
- goto st0;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 42 )
- goto st17;
- goto st16;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- switch( (*p) ) {
- case 42: goto st17;
- case 47: goto st9;
- }
- goto st16;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- if ( (*p) == 10 )
- goto st9;
- goto st18;
-tr4:
-#line 135 "parser.rl"
- { p--; {p++; cs = 27; goto _out;} }
- goto st27;
-st27:
- if ( ++p == pe )
- goto _test_eof27;
-case 27:
-#line 341 "parser.c"
- goto st0;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- switch( (*p) ) {
- case 42: goto st20;
- case 47: goto st22;
- }
- goto st0;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- if ( (*p) == 42 )
- goto st21;
- goto st20;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
- switch( (*p) ) {
- case 42: goto st21;
- case 47: goto st8;
- }
- goto st20;
-st22:
- if ( ++p == pe )
- goto _test_eof22;
-case 22:
- if ( (*p) == 10 )
- goto st8;
- goto st22;
-st23:
- if ( ++p == pe )
- goto _test_eof23;
-case 23:
- switch( (*p) ) {
- case 42: goto st24;
- case 47: goto st26;
- }
- goto st0;
-st24:
- if ( ++p == pe )
- goto _test_eof24;
-case 24:
- if ( (*p) == 42 )
- goto st25;
- goto st24;
-st25:
- if ( ++p == pe )
- goto _test_eof25;
-case 25:
- switch( (*p) ) {
- case 42: goto st25;
- case 47: goto st2;
- }
- goto st24;
-st26:
- if ( ++p == pe )
- goto _test_eof26;
-case 26:
- if ( (*p) == 10 )
- goto st2;
- goto st26;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof27: cs = 27; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
- _test_eof21: cs = 21; goto _test_eof;
- _test_eof22: cs = 22; goto _test_eof;
- _test_eof23: cs = 23; goto _test_eof;
- _test_eof24: cs = 24; goto _test_eof;
- _test_eof25: cs = 25; goto _test_eof;
- _test_eof26: cs = 26; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 160 "parser.rl"
-
- if (cs >= JSON_object_first_final) {
- if (RTEST(json->create_id)) {
- VALUE klassname = rb_hash_aref(*result, json->create_id);
- if (!NIL_P(klassname)) {
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
- if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
+
+ return NULL;
+}
+
+NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
+{
+ bool intern = is_name || config->freeze;
+ bool symbolize = is_name && config->symbolize_names;
+ size_t bufferSize = stringEnd - string;
+ const char *p = string, *pe = string, *bufferStart;
+ char *buffer;
+
+ VALUE result = rb_str_buf_new(bufferSize);
+ rb_enc_associate_index(result, utf8_encindex);
+ buffer = RSTRING_PTR(result);
+ bufferStart = buffer;
+
+#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
+
+ while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
+ if (pe > p) {
+ MEMCPY(buffer, p, char, pe - p);
+ buffer += pe - p;
+ }
+ switch (*++pe) {
+ case '"':
+ case '/':
+ p = pe; // nothing to unescape just need to skip the backslash
+ break;
+ case '\\':
+ APPEND_CHAR('\\');
+ break;
+ case 'n':
+ APPEND_CHAR('\n');
+ break;
+ case 'r':
+ APPEND_CHAR('\r');
+ break;
+ case 't':
+ APPEND_CHAR('\t');
+ break;
+ case 'b':
+ APPEND_CHAR('\b');
+ break;
+ case 'f':
+ APPEND_CHAR('\f');
+ break;
+ case 'u': {
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
+ pe += 3;
+ /* To handle values above U+FFFF, we take a sequence of
+ * \uXXXX escapes in the U+D800..U+DBFF then
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
+ * to make a 20-bit number, then add 0x10000 to get the
+ * final codepoint.
+ *
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
+ * Area".
+ */
+ if ((ch & 0xFC00) == 0xD800) {
+ pe++;
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
+
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
+ }
+
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
+ pe += 5;
+ } else {
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
+ break;
+ }
}
+
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
+ buffer += unescape_len;
+ p = ++pe;
+ break;
}
+ default:
+ if ((unsigned char)*pe < 0x20) {
+ if (!config->allow_control_characters) {
+ if (*pe == '\n') {
+ raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
+ }
+ raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
+ }
+ }
+
+ if (config->allow_invalid_escape) {
+ APPEND_CHAR(*pe);
+ } else {
+ raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
+ }
+ break;
}
- return p + 1;
+ }
+#undef APPEND_CHAR
+
+ if (stringEnd > p) {
+ MEMCPY(buffer, p, char, stringEnd - p);
+ buffer += stringEnd - p;
+ }
+ rb_str_set_len(result, buffer - bufferStart);
+
+ if (symbolize) {
+ result = rb_str_intern(result);
+ } else if (intern) {
+ result = rb_str_to_interned_str(result);
+ }
+
+ return result;
+}
+
+#define MAX_FAST_INTEGER_SIZE 18
+#define MAX_NUMBER_STACK_BUFFER 128
+
+typedef VALUE (*json_number_decode_func_t)(const char *ptr);
+
+static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
+{
+ if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
+ char buffer[MAX_NUMBER_STACK_BUFFER];
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ return func(buffer);
} else {
- return NULL;
- }
-}
-
-
-#line 458 "parser.c"
-static const int JSON_value_start = 1;
-static const int JSON_value_first_final = 21;
-static const int JSON_value_error = 0;
-
-static const int JSON_value_en_main = 1;
-
-
-#line 258 "parser.rl"
-
-
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
-
-#line 474 "parser.c"
- {
- cs = JSON_value_start;
- }
-
-#line 265 "parser.rl"
-
-#line 481 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- switch( (*p) ) {
- case 34: goto tr0;
- case 45: goto tr2;
- case 73: goto st2;
- case 78: goto st9;
- case 91: goto tr5;
- case 102: goto st11;
- case 110: goto st15;
- case 116: goto st18;
- case 123: goto tr9;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-tr0:
-#line 206 "parser.rl"
- {
- char *np = JSON_parse_string(json, p, pe, result);
- if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;}
- }
- goto st21;
-tr2:
-#line 211 "parser.rl"
- {
- char *np;
- if(pe > p + 9 && !strncmp(MinusInfinity, p, 9)) {
- if (json->allow_nan) {
- *result = CMinusInfinity;
- {p = (( p + 10))-1;}
- p--; {p++; cs = 21; goto _out;}
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
+ VALUE buffer_v = rb_str_tmp_new(len);
+ char *buffer = RSTRING_PTR(buffer_v);
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ VALUE number = func(buffer);
+ RB_GC_GUARD(buffer_v);
+ return number;
+ }
+}
+
+static VALUE json_decode_inum(const char *buffer)
+{
+ return rb_cstr2inum(buffer, 10);
+}
+
+NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
+{
+ return json_decode_large_number(start, len, json_decode_inum);
+}
+
+static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
+{
+ if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
+ if (negative) {
+ return INT64T2NUM(-((int64_t)mantissa));
+ }
+ return UINT64T2NUM(mantissa);
+ }
+
+ return json_decode_large_integer(start, end - start);
+}
+
+static VALUE json_decode_dnum(const char *buffer)
+{
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
+}
+
+NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
+{
+ return json_decode_large_number(start, len, json_decode_dnum);
+}
+
+/* Ruby JSON optimized float decoder using vendored Ryu algorithm
+ * Accepts pre-extracted mantissa and exponent from first-pass validation
+ */
+static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
+ const char *start, const char *end)
+{
+ if (RB_UNLIKELY(config->decimal_class)) {
+ VALUE text = rb_str_new(start, end - start);
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
+ }
+
+ if (RB_UNLIKELY(exponent > INT32_MAX)) {
+ return negative ? CMinusInfinity : CInfinity;
+ }
+
+ if (RB_UNLIKELY(exponent < INT32_MIN)) {
+ return rb_float_new(negative ? -0.0 : 0.0);
+ }
+
+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
+ if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
+ return json_decode_large_float(start, end - start);
+ }
+
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
+}
+
+static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
+{
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count));
+ rvalue_stack_pop(state->value_stack, count);
+
+ if (config->freeze) {
+ RB_OBJ_FREEZE(array);
+ }
+
+ return array;
+}
+
+static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
+{
+ VALUE set = rb_hash_new_capa(count / 2);
+ for (size_t index = 0; index < count; index += 2) {
+ size_t before = RHASH_SIZE(set);
+ VALUE key = pairs[index];
+ rb_hash_aset(set, key, Qtrue);
+ if (RHASH_SIZE(set) == before) {
+ if (RB_SYMBOL_P(key)) {
+ return rb_sym2str(key);
}
+ return key;
}
- np = JSON_parse_float(json, p, pe, result);
- if (np != NULL) {p = (( np))-1;}
- np = JSON_parse_integer(json, p, pe, result);
- if (np != NULL) {p = (( np))-1;}
- p--; {p++; cs = 21; goto _out;}
- }
- goto st21;
-tr5:
-#line 229 "parser.rl"
- {
- char *np;
- json->current_nesting++;
- np = JSON_parse_array(json, p, pe, result);
- json->current_nesting--;
- if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;}
- }
- goto st21;
-tr9:
-#line 237 "parser.rl"
- {
- char *np;
- json->current_nesting++;
- np = JSON_parse_object(json, p, pe, result);
- json->current_nesting--;
- if (np == NULL) { p--; {p++; cs = 21; goto _out;} } else {p = (( np))-1;}
- }
- goto st21;
-tr16:
-#line 199 "parser.rl"
- {
- if (json->allow_nan) {
- *result = CInfinity;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
+ }
+ return Qfalse;
+}
+
+NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
+{
+ VALUE message = rb_sprintf(
+ "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
+ rb_inspect(duplicate_key)
+ );
+
+ emit_parse_warning(RSTRING_PTR(message), state);
+ RB_GC_GUARD(message);
+}
+
+NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
+{
+ VALUE message = rb_sprintf(
+ "duplicate key %"PRIsVALUE,
+ rb_inspect(duplicate_key)
+ );
+
+ long line, column;
+ cursor_position(state, &line, &column);
+ rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
+ rb_exc_raise(parse_error_new(message, line, column));
+}
+
+NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs)
+{
+ switch (config->on_duplicate_key) {
+ case JSON_IGNORE:
+ return;
+
+ case JSON_DEPRECATED:
+ // Only emit the first few deprecations to avoid spamming.
+ if (state->emitted_deprecations < 5) {
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
+ state->emitted_deprecations++;
+ }
+ return;
+
+ case JSON_RAISE:
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
+ return;
+ }
+ UNREACHABLE;
+}
+
+static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
+{
+ size_t entries_count = count / 2;
+ VALUE object = rb_hash_new_capa(entries_count);
+ const VALUE *pairs = rvalue_stack_peek(state->value_stack, count);
+ rb_hash_bulk_insert(count, pairs, object);
+
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
+ json_on_duplicate_key(state, config, count, pairs);
+ }
+
+ rvalue_stack_pop(state->value_stack, count);
+
+ if (config->freeze) {
+ RB_OBJ_FREEZE(object);
+ }
+
+ return object;
+}
+
+static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
+{
+ if (RB_UNLIKELY(config->on_load_proc)) {
+ value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
+ }
+ rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack);
+ return value;
+}
+
+static const bool string_scan_table[256] = {
+ // ASCII Control Characters
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ // ASCII Characters
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+#ifdef HAVE_SIMD
+static SIMD_Implementation simd_impl = SIMD_NONE;
+#endif /* HAVE_SIMD */
+
+ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
+{
+#ifdef HAVE_SIMD
+#if defined(HAVE_SIMD_NEON)
+
+ uint64_t mask = 0;
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
+ state->cursor += trailing_zeros64(mask) >> 2;
+ return true;
+ }
+
+#elif defined(HAVE_SIMD_SSE2)
+ if (simd_impl == SIMD_SSE2) {
+ int mask = 0;
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
+ state->cursor += trailing_zeros(mask);
+ return true;
}
}
- goto st21;
-tr18:
-#line 192 "parser.rl"
- {
- if (json->allow_nan) {
- *result = CNaN;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
+#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
+#endif /* HAVE_SIMD */
+
+ while (!eos(state)) {
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
+ return true;
}
+ state->cursor++;
}
- goto st21;
-tr22:
-#line 186 "parser.rl"
- {
- *result = Qfalse;
- }
- goto st21;
-tr25:
-#line 183 "parser.rl"
- {
- *result = Qnil;
- }
- goto st21;
-tr28:
-#line 189 "parser.rl"
- {
- *result = Qtrue;
- }
- goto st21;
-st21:
- if ( ++p == pe )
- goto _test_eof21;
-case 21:
-#line 245 "parser.rl"
- { p--; {p++; cs = 21; goto _out;} }
-#line 596 "parser.c"
- goto st0;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- if ( (*p) == 110 )
- goto st3;
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 102 )
- goto st4;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- if ( (*p) == 105 )
- goto st5;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 110 )
- goto st6;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( (*p) == 105 )
- goto st7;
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 116 )
- goto st8;
- goto st0;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- if ( (*p) == 121 )
- goto tr16;
- goto st0;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- if ( (*p) == 97 )
- goto st10;
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- if ( (*p) == 78 )
- goto tr18;
- goto st0;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- if ( (*p) == 97 )
- goto st12;
- goto st0;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 108 )
- goto st13;
- goto st0;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- if ( (*p) == 115 )
- goto st14;
- goto st0;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 101 )
- goto tr22;
- goto st0;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- if ( (*p) == 117 )
- goto st16;
- goto st0;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 108 )
- goto st17;
- goto st0;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
- if ( (*p) == 108 )
- goto tr25;
- goto st0;
-st18:
- if ( ++p == pe )
- goto _test_eof18;
-case 18:
- if ( (*p) == 114 )
- goto st19;
- goto st0;
-st19:
- if ( ++p == pe )
- goto _test_eof19;
-case 19:
- if ( (*p) == 117 )
- goto st20;
- goto st0;
-st20:
- if ( ++p == pe )
- goto _test_eof20;
-case 20:
- if ( (*p) == 101 )
- goto tr28;
- goto st0;
- }
- _test_eof21: cs = 21; goto _test_eof;
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof18: cs = 18; goto _test_eof;
- _test_eof19: cs = 19; goto _test_eof;
- _test_eof20: cs = 20; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 266 "parser.rl"
-
- if (cs >= JSON_value_first_final) {
- return p;
- } else {
- return NULL;
- }
-}
-
-
-#line 767 "parser.c"
-static const int JSON_integer_start = 1;
-static const int JSON_integer_first_final = 5;
-static const int JSON_integer_error = 0;
-
-static const int JSON_integer_en_main = 1;
-
-
-#line 282 "parser.rl"
-
-
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
-
-#line 783 "parser.c"
- {
- cs = JSON_integer_start;
- }
-
-#line 289 "parser.rl"
- json->memo = p;
-
-#line 791 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- switch( (*p) ) {
- case 45: goto st2;
- case 48: goto st3;
- }
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st4;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- if ( (*p) == 48 )
- goto st3;
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st4;
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st0;
- goto tr4;
-tr4:
-#line 279 "parser.rl"
- { p--; {p++; cs = 5; goto _out;} }
- goto st5;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
-#line 832 "parser.c"
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st4;
- goto tr4;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 291 "parser.rl"
-
- if (cs >= JSON_integer_first_final) {
- long len = p - json->memo;
- *result = rb_Integer(rb_str_new(json->memo, len));
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-#line 863 "parser.c"
-static const int JSON_float_start = 1;
-static const int JSON_float_first_final = 10;
-static const int JSON_float_error = 0;
-
-static const int JSON_float_en_main = 1;
-
-
-#line 313 "parser.rl"
-
-
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
-
-#line 879 "parser.c"
- {
- cs = JSON_float_start;
- }
-
-#line 320 "parser.rl"
- json->memo = p;
-
-#line 887 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- switch( (*p) ) {
- case 45: goto st2;
- case 48: goto st3;
- }
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st9;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- if ( (*p) == 48 )
- goto st3;
- if ( 49 <= (*p) && (*p) <= 57 )
- goto st9;
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- switch( (*p) ) {
- case 46: goto st4;
- case 69: goto st6;
- case 101: goto st6;
- }
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st5;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- switch( (*p) ) {
- case 69: goto st6;
- case 101: goto st6;
- }
- if ( (*p) > 46 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st5;
- } else if ( (*p) >= 45 )
- goto st0;
- goto tr7;
-tr7:
-#line 307 "parser.rl"
- { p--; {p++; cs = 10; goto _out;} }
- goto st10;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
-#line 952 "parser.c"
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 43: goto st7;
- case 45: goto st7;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- goto st0;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 69: goto st0;
- case 101: goto st0;
- }
- if ( (*p) > 46 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st8;
- } else if ( (*p) >= 45 )
- goto st0;
- goto tr7;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- switch( (*p) ) {
- case 46: goto st4;
- case 69: goto st6;
- case 101: goto st6;
- }
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st9;
- goto st0;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 322 "parser.rl"
-
- if (cs >= JSON_float_first_final) {
- long len = p - json->memo;
- *result = rb_Float(rb_str_new(json->memo, len));
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-
-#line 1026 "parser.c"
-static const int JSON_array_start = 1;
-static const int JSON_array_first_final = 17;
-static const int JSON_array_error = 0;
-
-static const int JSON_array_en_main = 1;
-
-
-#line 358 "parser.rl"
-
-
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE array_class = json->array_class;
-
- if (json->max_nesting && json->current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
- }
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
-
-
-#line 1048 "parser.c"
- {
- cs = JSON_array_start;
- }
-
-#line 371 "parser.rl"
-
-#line 1055 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 91 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 13: goto st2;
- case 32: goto st2;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st13;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 93: goto tr4;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st2;
- goto st0;
-tr2:
-#line 339 "parser.rl"
- {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, p, pe, &v);
- if (np == NULL) {
- p--; {p++; cs = 3; goto _out;}
+
+ // If the string ended with an unterminated escape sequence, we might
+ // have gone past the end.
+ if (RB_UNLIKELY(state->cursor > state->end)) {
+ state->cursor = state->end;
+ }
+
+ return false;
+}
+
+static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
+{
+ const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
+ JSON_UnescapePositions positions = {
+ .size = 0,
+ .positions = backslashes,
+ .additional_backslashes = 0,
+ };
+
+ do {
+ switch (*state->cursor) {
+ case '"': {
+ VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
+ state->cursor++;
+ return string;
+ }
+ case '\\': {
+ if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
+ backslashes[positions.size] = state->cursor;
+ positions.size++;
+ } else {
+ positions.additional_backslashes++;
+ }
+ state->cursor++;
+ break;
+ }
+ default:
+ if (!config->allow_control_characters) {
+ raise_parse_error("invalid ASCII control character in string: %s", state);
+ }
+ break;
+ }
+
+ state->cursor++;
+ } while (string_scan(state));
+
+ raise_parse_error("unexpected end of input, expected closing \"", state);
+ return Qfalse;
+}
+
+ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
+{
+ state->cursor++;
+ const char *start = state->cursor;
+
+ if (RB_UNLIKELY(!string_scan(state))) {
+ raise_parse_error("unexpected end of input, expected closing \"", state);
+ }
+
+ VALUE string;
+ if (RB_LIKELY(*state->cursor == '"')) {
+ string = json_string_fastpath(state, config, start, state->cursor, is_name);
+ state->cursor++;
+ }
+ else {
+ string = json_parse_escaped_string(state, config, is_name, start);
+ }
+
+ return string;
+}
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
+// Additional References:
+// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
+static inline uint64_t decode_8digits_unrolled(uint64_t val) {
+ const uint64_t mask = 0x000000FF000000FF;
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+ val -= 0x3030303030303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+ return val;
+}
+
+static inline uint64_t decode_4digits_unrolled(uint32_t val) {
+ const uint32_t mask = 0x000000FF;
+ const uint32_t mul1 = 100;
+ val -= 0x30303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
+ return val;
+}
+#endif
+
+static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
+{
+ const char *start = state->cursor;
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+ while (rest(state) >= sizeof(uint64_t)) {
+ uint64_t next_8bytes;
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
+
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
+
+ if (match == 0x3333333333333333) { // 8 consecutive digits
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
+ state->cursor += 8;
+ continue;
+ }
+
+ uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
+
+ if (consecutive_digits >= 4) {
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
+ state->cursor += 4;
+ consecutive_digits -= 4;
+ }
+
+ while (consecutive_digits) {
+ *accumulator = *accumulator * 10 + (*state->cursor - '0');
+ consecutive_digits--;
+ state->cursor++;
+ }
+
+ return (int)(state->cursor - start);
+ }
+#endif
+
+ char next_char;
+ while (rb_isdigit(next_char = peek(state))) {
+ *accumulator = *accumulator * 10 + (next_char - '0');
+ state->cursor++;
+ }
+ return (int)(state->cursor - start);
+}
+
+static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
+{
+ bool integer = true;
+ const char first_digit = *state->cursor;
+
+ // Variables for Ryu optimization - extract digits during parsing
+ int64_t exponent = 0;
+ int decimal_point_pos = -1;
+ uint64_t mantissa = 0;
+
+ // Parse integer part and extract mantissa digits
+ int mantissa_digits = json_parse_digits(state, &mantissa);
+
+ if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+
+ // Parse fractional part
+ if (peek(state) == '.') {
+ integer = false;
+ decimal_point_pos = mantissa_digits; // Remember position of decimal point
+ state->cursor++;
+
+ int fractional_digits = json_parse_digits(state, &mantissa);
+ mantissa_digits += fractional_digits;
+
+ if (RB_UNLIKELY(!fractional_digits)) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+ }
+
+ // Parse exponent
+ if (rb_tolower(peek(state)) == 'e') {
+ integer = false;
+ state->cursor++;
+
+ bool negative_exponent = false;
+ const char next_char = peek(state);
+ if (next_char == '-' || next_char == '+') {
+ negative_exponent = next_char == '-';
+ state->cursor++;
+ }
+
+ uint64_t abs_exponent = 0;
+ int exponent_digits = json_parse_digits(state, &abs_exponent);
+
+ if (RB_UNLIKELY(!exponent_digits)) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+
+ if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) {
+ exponent = negative_exponent ? INT64_MIN : INT64_MAX;
} else {
- rb_ary_push(*result, v);
- {p = (( np))-1;}
+ exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent;
}
}
- goto st3;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
-#line 1110 "parser.c"
- switch( (*p) ) {
- case 13: goto st3;
- case 32: goto st3;
- case 44: goto st4;
- case 47: goto st9;
- case 93: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st3;
- goto st0;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 13: goto st4;
- case 32: goto st4;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st5;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
- }
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st4;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st8;
- }
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( (*p) == 42 )
- goto st7;
- goto st6;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st4;
- }
- goto st6;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- if ( (*p) == 10 )
- goto st4;
- goto st8;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- switch( (*p) ) {
- case 42: goto st10;
- case 47: goto st12;
- }
- goto st0;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
- if ( (*p) == 42 )
- goto st11;
- goto st10;
-st11:
- if ( ++p == pe )
- goto _test_eof11;
-case 11:
- switch( (*p) ) {
- case 42: goto st11;
- case 47: goto st3;
- }
- goto st10;
-st12:
- if ( ++p == pe )
- goto _test_eof12;
-case 12:
- if ( (*p) == 10 )
- goto st3;
- goto st12;
-tr4:
-#line 350 "parser.rl"
- { p--; {p++; cs = 17; goto _out;} }
- goto st17;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
-#line 1217 "parser.c"
- goto st0;
-st13:
- if ( ++p == pe )
- goto _test_eof13;
-case 13:
- switch( (*p) ) {
- case 42: goto st14;
- case 47: goto st16;
- }
- goto st0;
-st14:
- if ( ++p == pe )
- goto _test_eof14;
-case 14:
- if ( (*p) == 42 )
- goto st15;
- goto st14;
-st15:
- if ( ++p == pe )
- goto _test_eof15;
-case 15:
- switch( (*p) ) {
- case 42: goto st15;
- case 47: goto st2;
- }
- goto st14;
-st16:
- if ( ++p == pe )
- goto _test_eof16;
-case 16:
- if ( (*p) == 10 )
- goto st2;
- goto st16;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof11: cs = 11; goto _test_eof;
- _test_eof12: cs = 12; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
- _test_eof13: cs = 13; goto _test_eof;
- _test_eof14: cs = 14; goto _test_eof;
- _test_eof15: cs = 15; goto _test_eof;
- _test_eof16: cs = 16; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 372 "parser.rl"
-
- if(cs >= JSON_array_first_final) {
- return p + 1;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return NULL;
+
+ if (integer) {
+ return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
}
+
+ // Adjust exponent based on decimal point position
+ if (decimal_point_pos >= 0) {
+ exponent -= (mantissa_digits - decimal_point_pos);
+ }
+
+ return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
}
-static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
+static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
{
- char *p = string, *pe = string, *unescape;
- int unescape_len;
+ return json_parse_number(state, config, false, state->cursor);
+}
- while (pe < stringEnd) {
- if (*pe == '\\') {
- unescape = (char *) "?";
- unescape_len = 1;
- if (pe > p) rb_str_buf_cat(result, p, pe - p);
- switch (*++pe) {
- case 'n':
- unescape = (char *) "\n";
- break;
- case 'r':
- unescape = (char *) "\r";
+static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ return json_parse_number(state, config, true, state->cursor - 1);
+}
+
+// How many values (array elements, or interleaved object keys+values) have been
+// pushed onto the rvalue stack since this container opened. Used to size the
+// bulk decode on close, and to tell the first key/colon from later ones.
+static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack)
+{
+ return value_stack->head - frame->value_stack_head;
+}
+
+// A complete value now sits on top of the rvalue stack. Advance the frame that
+// was waiting for it: the root document is done, or the enclosing container
+// moves on to expecting a ',' or its closing bracket. The caller passes the
+// frame it already has in hand -- the one that was expecting the value -- which
+// after a container close is the freshly re-exposed parent.
+static inline void json_value_completed(json_frame *frame)
+{
+ JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
+ JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
+ JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);
+
+ frame->phase = (enum json_frame_phase) frame->type;
+}
+
+ALWAYS_INLINE(static) bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
+{
+ // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
+ // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.
+
+ size_t len = strlen(keyword);
+
+ // Note: memcmp with a small power of two and a literal string compile to an integer comparison /
+ // That's why we sometime compare starting from the first byte and sometimes from the second.
+ if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
+ state->cursor += len;
+ return true;
+ }
+ return false;
+}
+
+// Parse an arbitrary JSON value iteratively. This is a state machine driven
+// entirely by the top frame's phase so it can stop at any value boundary and
+// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
+// bottom of the stack, so the stack is never empty mid-parse and the document
+// itself is just another frame whose value, once parsed, leaves its phase DONE.
+static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ json_frame *frame = json_frame_stack_peek(state->frames);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ UNREACHABLE_RETURN(Qundef);
+
+ JSON_PHASE_DONE: {
+ // The root document value is parsed; it is the lone survivor on
+ // the rvalue stack.
+ return *rvalue_stack_peek(state->value_stack, 1);
+ }
+
+ JSON_PHASE_VALUE: {
+ json_eat_whitespace(state);
+
+ VALUE value;
+ switch (peek(state)) {
+ case 'n':
+ if (json_match_keyword(state, "null", 0)) {
+ value = Qnil;
break;
- case 't':
- unescape = (char *) "\t";
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case 't':
+ if (json_match_keyword(state, "true", 0)) {
+ value = Qtrue;
break;
- case '"':
- unescape = (char *) "\"";
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case 'f':
+ if (json_match_keyword(state, "false", 1)) {
+ value = Qfalse;
break;
- case '\\':
- unescape = (char *) "\\";
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case 'N':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (config->allow_nan && json_match_keyword(state, "NaN", 1)) {
+ value = CNaN;
break;
- case 'b':
- unescape = (char *) "\b";
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case 'I':
+ if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
+ value = CInfinity;
break;
- case 'f':
- unescape = (char *) "\f";
+ }
+ raise_parse_error("unexpected token %s", state);
+
+ case '-': {
+ state->cursor++;
+ if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
+ value = CMinusInfinity;
+ } else {
+ value = json_parse_negative_number(state, config);
+ }
+ break;
+ }
+
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+ value = json_parse_positive_number(state, config);
+ break;
+
+ case '"':
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
+ value = json_parse_string(state, config, false);
+ break;
+
+ case '[': {
+ state->cursor++;
+ json_eat_whitespace(state);
+
+ if (peek(state) == ']') {
+ state->cursor++;
+ value = json_decode_array(state, config, 0);
break;
- case 'u':
- if (pe > stringEnd - 4) {
- return Qnil;
- } else {
- char buf[4];
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
- pe += 3;
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
- pe++;
- if (pe > stringEnd - 6) return Qnil;
- if (pe[0] == '\\' && pe[1] == 'u') {
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
- | (sur & 0x3FF));
- pe += 5;
- } else {
- unescape = (char *) "?";
- break;
- }
- }
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
- unescape = buf;
- }
+ }
+
+ state->current_nesting++;
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
+ }
+ state->in_array++;
+
+ // Phase stays VALUE: the next iteration reads the first element.
+ frame = json_frame_stack_push(state, (json_frame){
+ .type = JSON_FRAME_ARRAY,
+ .phase = JSON_PHASE_VALUE,
+ .value_stack_head = state->value_stack->head,
+ });
+ goto JSON_PHASE_VALUE;
+ }
+ case '{': {
+ const char *object_start_cursor = state->cursor;
+
+ state->cursor++;
+ json_eat_whitespace(state);
+
+ if (peek(state) == '}') {
+ state->cursor++;
+ value = json_decode_object(state, config, 0);
break;
- default:
- p = pe;
- continue;
+ }
+
+ state->current_nesting++;
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
+ }
+
+ // Phase KEY: the next iteration reads the first key.
+ frame = json_frame_stack_push(state, (json_frame){
+ .type = JSON_FRAME_OBJECT,
+ .phase = JSON_PHASE_OBJECT_KEY,
+ .value_stack_head = state->value_stack->head,
+ .start_cursor = object_start_cursor,
+ });
+ goto JSON_PHASE_OBJECT_KEY;
+ }
+
+ case 0:
+ raise_parse_error("unexpected end of input", state);
+
+ default:
+ raise_parse_error("unexpected character: %s", state);
+ }
+
+ json_push_value(state, config, value);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
+
+ JSON_PHASE_OBJECT_KEY: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
+
+ json_eat_whitespace(state);
+
+ if (RB_LIKELY(peek(state) == '"')) {
+ json_push_value(state, config, json_parse_string(state, config, true));
+ frame->phase = JSON_PHASE_OBJECT_COLON;
+ goto JSON_PHASE_OBJECT_COLON;
+ } else {
+ // The message differs for the first key vs. a key after a
+ // ',': the first is the only one reached with nothing pushed
+ // for this object yet.
+ if (json_frame_entry_count(frame, state->value_stack) == 0) {
+ raise_parse_error("expected object key, got %s", state);
+ } else {
+ raise_parse_error("expected object key, got: %s", state);
}
- rb_str_buf_cat(result, unescape, unescape_len);
- p = ++pe;
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
+
+ JSON_PHASE_OBJECT_COLON: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
+
+ json_eat_whitespace(state);
+
+ if (RB_LIKELY(peek(state) == ':')) {
+ state->cursor++;
+ frame->phase = JSON_PHASE_VALUE;
+ goto JSON_PHASE_VALUE;
} else {
- pe++;
+ // First colon (only the first pair's key is pushed, nothing
+ // else) vs. a later one.
+ if (json_frame_entry_count(frame, state->value_stack) == 1) {
+ raise_parse_error("expected ':' after object key", state);
+ } else {
+ raise_parse_error("expected ':' after object key, got: %s", state);
+ }
}
+ UNREACHABLE_RETURN(Qundef);
}
- rb_str_buf_cat(result, p, pe - p);
- return result;
-}
+ JSON_PHASE_ARRAY_COMMA: {
+ JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
-#line 1354 "parser.c"
-static const int JSON_string_start = 1;
-static const int JSON_string_first_final = 8;
-static const int JSON_string_error = 0;
-
-static const int JSON_string_en_main = 1;
-
-
-#line 471 "parser.rl"
-
-
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- *result = rb_str_buf_new(0);
-
-#line 1371 "parser.c"
- {
- cs = JSON_string_start;
- }
-
-#line 479 "parser.rl"
- json->memo = p;
-
-#line 1379 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-case 1:
- if ( (*p) == 34 )
- goto st2;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 34: goto tr2;
- case 92: goto st3;
- }
- if ( 0 <= (*p) && (*p) <= 31 )
- goto st0;
- goto st2;
-tr2:
-#line 457 "parser.rl"
- {
- *result = json_string_unescape(*result, json->memo + 1, p);
- if (NIL_P(*result)) {
- p--;
- {p++; cs = 8; goto _out;}
- } else {
- FORCE_UTF8(*result);
- {p = (( p + 1))-1;}
- }
- }
-#line 468 "parser.rl"
- { p--; {p++; cs = 8; goto _out;} }
- goto st8;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
-#line 1422 "parser.c"
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 117 )
- goto st4;
- if ( 0 <= (*p) && (*p) <= 31 )
- goto st0;
- goto st2;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st5;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st5;
- } else
- goto st5;
- goto st0;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st6;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st6;
- } else
- goto st6;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st7;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st7;
- } else
- goto st7;
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) < 65 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto st2;
- } else if ( (*p) > 70 ) {
- if ( 97 <= (*p) && (*p) <= 102 )
- goto st2;
- } else
- goto st2;
- goto st0;
- }
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 481 "parser.rl"
-
- if (json->symbolize_names && json->parsing_name) {
- *result = rb_str_intern(*result);
- }
- if (cs >= JSON_string_first_final) {
- return p + 1;
- } else {
- return NULL;
+ json_eat_whitespace(state);
+
+ const char next_char = peek(state);
+
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == ']') {
+ // Trailing comma: stay in COMMA to close on the next iteration.
+ goto JSON_PHASE_ARRAY_COMMA;
+ }
+ }
+ frame->phase = JSON_PHASE_VALUE;
+ goto JSON_PHASE_VALUE;
+ } else if (next_char == ']') {
+ state->cursor++;
+ long count = json_frame_entry_count(frame, state->value_stack);
+ state->current_nesting--;
+ state->in_array--;
+ json_frame_stack_pop(state->frames);
+ json_push_value(state, config, json_decode_array(state, config, count));
+ frame = json_frame_stack_peek(state->frames);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ } else {
+ raise_parse_error("expected ',' or ']' after array value", state);
+ }
+ UNREACHABLE_RETURN(Qundef);
}
-}
+ JSON_PHASE_OBJECT_COMMA: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
+ json_eat_whitespace(state);
+ const char next_char = peek(state);
-#line 1512 "parser.c"
-static const int JSON_start = 1;
-static const int JSON_first_final = 10;
-static const int JSON_error = 0;
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
-static const int JSON_en_main = 1;
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == '}') {
+ // Trailing comma: stay in COMMA to close on the next iteration.
+ goto JSON_PHASE_OBJECT_COMMA;
+ }
+ }
+ frame->phase = JSON_PHASE_OBJECT_KEY;
+ goto JSON_PHASE_OBJECT_KEY;
+ } else if (next_char == '}') {
+ state->cursor++;
+ state->current_nesting--;
+ size_t count = json_frame_entry_count(frame, state->value_stack);
+
+ // Temporary rewind cursor in case an error is raised
+ const char *final_cursor = state->cursor;
+ state->cursor = frame->start_cursor;
+ VALUE object = json_decode_object(state, config, count);
+ state->cursor = final_cursor;
+
+ json_push_value(state, config, object);
+ json_frame_stack_pop(state->frames);
+ frame = json_frame_stack_peek(state->frames);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ } else {
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
-#line 518 "parser.rl"
+ UNREACHABLE_RETURN(Qundef);
+}
+static void json_ensure_eof(JSON_ParserState *state)
+{
+ json_eat_whitespace(state);
+ if (!eos(state)) {
+ raise_parse_error("unexpected token at end of stream %s", state);
+ }
+}
/*
* Document-class: JSON::Ext::Parser
@@ -1533,58 +1788,106 @@ static const int JSON_en_main = 1;
static VALUE convert_encoding(VALUE source)
{
- char *ptr = RSTRING_PTR(source);
- long len = RSTRING_LEN(source);
- if (len < 2) {
- rb_raise(eParserError, "A JSON text must at least contain two octets!");
- }
-#ifdef HAVE_RUBY_ENCODING_H
- {
- VALUE encoding = rb_funcall(source, i_encoding, 0);
- if (encoding == CEncoding_ASCII_8BIT) {
- if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else {
- FORCE_UTF8(source);
+ StringValue(source);
+ int encindex = RB_ENCODING_GET(source);
+
+ if (RB_LIKELY(encindex == utf8_encindex)) {
+ return source;
+ }
+
+ if (encindex == binary_encindex) {
+ // For historical reason, we silently reinterpret binary strings as UTF-8
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+ }
+
+ source = rb_funcall(source, i_encode, 1, Encoding_UTF_8);
+ StringValue(source);
+ return source;
+}
+
+struct parser_config_init_args {
+ JSON_ParserConfig *config;
+ VALUE self;
+};
+
+static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val)
+{
+ *dest = val;
+ if (self) RB_OBJ_WRITTEN(self, Qundef, val);
+}
+
+static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
+{
+ struct parser_config_init_args *args = (struct parser_config_init_args *)data;
+ JSON_ParserConfig *config = args->config;
+ VALUE self = args->self;
+
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
+ else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
+ else if (key == sym_on_load) { parser_config_wb_write(self, &config->on_load_proc, RTEST(val) ? val : Qfalse); }
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
+ else if (key == sym_decimal_class) {
+ if (RTEST(val)) {
+ if (rb_respond_to(val, i_try_convert)) {
+ parser_config_wb_write(self, &config->decimal_class, val);
+ config->decimal_method_id = i_try_convert;
+ } else if (rb_respond_to(val, i_new)) {
+ parser_config_wb_write(self, &config->decimal_class, val);
+ config->decimal_method_id = i_new;
+ } else if (RB_TYPE_P(val, T_CLASS)) {
+ VALUE name = rb_class_name(val);
+ const char *name_cstr = RSTRING_PTR(name);
+ const char *last_colon = strrchr(name_cstr, ':');
+ if (last_colon) {
+ const char *mod_path_end = last_colon - 1;
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
+ parser_config_wb_write(self, &config->decimal_class, rb_path_to_class(mod_path));
+
+ const char *method_name_beg = last_colon + 1;
+ long before_len = method_name_beg - name_cstr;
+ long len = RSTRING_LEN(name) - before_len;
+ VALUE method_name = rb_str_substr(name, before_len, len);
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
+ } else {
+ parser_config_wb_write(self, &config->decimal_class, rb_mKernel);
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
+ }
}
- } else {
- source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
}
}
-#else
- if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
- } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
- } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
- } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
+
+ return ST_CONTINUE;
+}
+
+static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self)
+{
+ config->max_nesting = 100;
+
+ struct parser_config_init_args args = {
+ .config = config,
+ .self = self,
+ };
+
+ if (!NIL_P(opts)) {
+ Check_Type(opts, T_HASH);
+ if (RHASH_SIZE(opts) > 0) {
+ // We assume in most cases few keys are set so it's faster to go over
+ // the provided keys than to check all possible keys.
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
+ }
+
}
-#endif
- return source;
}
/*
- * call-seq: new(source, opts => {})
+ * call-seq: new(opts => {})
*
- * Creates a new JSON::Ext::Parser instance for the string _source_.
- *
- * Creates a new JSON::Ext::Parser instance for the string _source_.
+ * Creates a new JSON::Ext::ParserConfig instance.
*
* It will be configured by the _opts_ hash. _opts_ can have the following
* keys:
@@ -1592,356 +1895,208 @@ static VALUE convert_encoding(VALUE source)
* _opts_ can have the following keys:
* * *max_nesting*: The maximum depth of nesting allowed in the parsed data
* structures. Disable depth checking with :max_nesting => false|nil|0, it
- * defaults to 19.
+ * defaults to 100.
* * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
* defiance of RFC 4627 to be parsed by the Parser. This option defaults to
* false.
* * *symbolize_names*: If set to true, returns symbols for the names
- * (keys) in a JSON object. Otherwise strings are returned, which is also
- * the default.
- * * *create_additions*: If set to false, the Parser doesn't create
- * additions even if a matchin class and create_id was found. This option
- * defaults to true.
- * * *object_class*: Defaults to Hash
- * * *array_class*: Defaults to Array
+ * (keys) in a JSON object. Otherwise strings are returned, which is
+ * also the default. It's not possible to use this option in
+ * conjunction with the *create_additions* option.
+ * * *decimal_class*: Specifies which class to use instead of the default
+ * (Float) when parsing decimal numbers. This class must accept a single
+ * string argument in its constructor.
*/
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
+static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
{
- char *ptr;
- long len;
- VALUE source, opts;
- GET_PARSER_INIT;
+ rb_check_frozen(self);
+ GET_PARSER_CONFIG;
+
+ parser_config_init(config, opts, self);
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
- rb_scan_args(argc, argv, "11", &source, &opts);
- source = convert_encoding(StringValue(source));
- ptr = RSTRING_PTR(source);
- len = RSTRING_LEN(source);
- if (!NIL_P(opts)) {
- opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
- if (NIL_P(opts)) {
- rb_raise(rb_eArgError, "opts needs to be like a hash");
- } else {
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 19;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- VALUE allow_nan = rb_hash_aref(opts, tmp);
- json->allow_nan = RTEST(allow_nan) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- VALUE symbolize_names = rb_hash_aref(opts, tmp);
- json->symbolize_names = RTEST(symbolize_names) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- VALUE create_additions = rb_hash_aref(opts, tmp);
- if (RTEST(create_additions)) {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- } else {
- json->create_id = Qnil;
- }
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- }
- } else {
- json->max_nesting = 19;
- json->allow_nan = 0;
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- json->object_class = Qnil;
- json->array_class = Qnil;
- }
- json->current_nesting = 0;
- json->len = len;
- json->source = ptr;
- json->Vsource = source;
return self;
}
+static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
+{
+ VALUE Vsource = convert_encoding(src);
+
+ // Ensure the string isn't mutated under us.
+ // The classic API to use is `rb_str_locktmp`, but then we'd
+ // need to use `rb_protect` to make sure we always unlock.
+ if (Vsource == src) {
+ Vsource = rb_str_new_frozen(Vsource);
+ }
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack value_stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+
+ // Seed the frame stack with the root frame, establishing the invariant that
+ // json_parse_any always has a top frame to dispatch on (so the stack is never
+ // empty mid-parse).
+ json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA];
+ frame_stack_buffer[0] = (json_frame){
+ .type = JSON_FRAME_ROOT,
+ .phase = JSON_PHASE_VALUE,
+ };
+ json_frame_stack frames = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = frame_stack_buffer,
+ .capa = JSON_FRAME_STACK_INITIAL_CAPA,
+ .head = 1,
+ };
+
+ long len;
+ const char *start;
+
+ RSTRING_GETMEM(Vsource, start, len);
+
+ VALUE value_stack_handle = 0;
+ VALUE frame_stack_handle = 0;
+ JSON_ParserState _state = {
+ .start = start,
+ .cursor = start,
+ .end = start + len,
+ .value_stack = &value_stack,
+ .value_stack_handle = &value_stack_handle,
+ .frames = &frames,
+ .frame_stack_handle = &frame_stack_handle,
+ };
+ JSON_ParserState *state = &_state;
+
+ VALUE result = json_parse_any(state, config);
+
+ // This may be skipped in case of exception, but
+ // it won't cause a leak.
+ rvalue_stack_eagerly_release(value_stack_handle);
+ json_frame_stack_eagerly_release(frame_stack_handle);
+ RB_GC_GUARD(value_stack_handle);
+ RB_GC_GUARD(frame_stack_handle);
+ RB_GC_GUARD(Vsource);
+ json_ensure_eof(state);
+
+ return result;
+}
+
/*
- * call-seq: parse()
+ * call-seq: parse(source)
*
* Parses the current JSON text _source_ and returns the complete data
* structure as a result.
+ * It raises JSON::ParserError if fail to parse.
*/
-static VALUE cParser_parse(VALUE self)
-{
- char *p, *pe;
- int cs = EVIL;
- VALUE result = Qnil;
- GET_PARSER;
-
-
-#line 1706 "parser.c"
- {
- cs = JSON_start;
- }
-
-#line 703 "parser.rl"
- p = json->source;
- pe = p + json->len;
-
-#line 1715 "parser.c"
- {
- if ( p == pe )
- goto _test_eof;
- switch ( cs )
- {
-st1:
- if ( ++p == pe )
- goto _test_eof1;
-case 1:
- switch( (*p) ) {
- case 13: goto st1;
- case 32: goto st1;
- case 47: goto st2;
- case 91: goto tr3;
- case 123: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st1;
- goto st0;
-st0:
-cs = 0;
- goto _out;
-st2:
- if ( ++p == pe )
- goto _test_eof2;
-case 2:
- switch( (*p) ) {
- case 42: goto st3;
- case 47: goto st5;
- }
- goto st0;
-st3:
- if ( ++p == pe )
- goto _test_eof3;
-case 3:
- if ( (*p) == 42 )
- goto st4;
- goto st3;
-st4:
- if ( ++p == pe )
- goto _test_eof4;
-case 4:
- switch( (*p) ) {
- case 42: goto st4;
- case 47: goto st1;
- }
- goto st3;
-st5:
- if ( ++p == pe )
- goto _test_eof5;
-case 5:
- if ( (*p) == 10 )
- goto st1;
- goto st5;
-tr3:
-#line 507 "parser.rl"
- {
- char *np;
- json->current_nesting = 1;
- np = JSON_parse_array(json, p, pe, &result);
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
- }
- goto st10;
-tr4:
-#line 500 "parser.rl"
- {
- char *np;
- json->current_nesting = 1;
- np = JSON_parse_object(json, p, pe, &result);
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
- }
- goto st10;
-st10:
- if ( ++p == pe )
- goto _test_eof10;
-case 10:
-#line 1792 "parser.c"
- switch( (*p) ) {
- case 13: goto st10;
- case 32: goto st10;
- case 47: goto st6;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st10;
- goto st0;
-st6:
- if ( ++p == pe )
- goto _test_eof6;
-case 6:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st9;
- }
- goto st0;
-st7:
- if ( ++p == pe )
- goto _test_eof7;
-case 7:
- if ( (*p) == 42 )
- goto st8;
- goto st7;
-st8:
- if ( ++p == pe )
- goto _test_eof8;
-case 8:
- switch( (*p) ) {
- case 42: goto st8;
- case 47: goto st10;
- }
- goto st7;
-st9:
- if ( ++p == pe )
- goto _test_eof9;
-case 9:
- if ( (*p) == 10 )
- goto st10;
- goto st9;
- }
- _test_eof1: cs = 1; goto _test_eof;
- _test_eof2: cs = 2; goto _test_eof;
- _test_eof3: cs = 3; goto _test_eof;
- _test_eof4: cs = 4; goto _test_eof;
- _test_eof5: cs = 5; goto _test_eof;
- _test_eof10: cs = 10; goto _test_eof;
- _test_eof6: cs = 6; goto _test_eof;
- _test_eof7: cs = 7; goto _test_eof;
- _test_eof8: cs = 8; goto _test_eof;
- _test_eof9: cs = 9; goto _test_eof;
-
- _test_eof: {}
- _out: {}
- }
-
-#line 706 "parser.rl"
-
- if (cs >= JSON_first_final && p == pe) {
- return result;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return Qnil;
- }
+static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
+{
+ GET_PARSER_CONFIG;
+ return cParser_parse(config, Vsource);
}
-static JSON_Parser *JSON_allocate()
+static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
{
- JSON_Parser *json = ALLOC(JSON_Parser);
- MEMZERO(json, JSON_Parser, 1);
- return json;
+ JSON_ParserConfig _config = {0};
+ JSON_ParserConfig *config = &_config;
+ parser_config_init(config, opts, false);
+
+ return cParser_parse(config, Vsource);
}
-static void JSON_mark(JSON_Parser *json)
+static void JSON_ParserConfig_mark(void *ptr)
{
- rb_gc_mark_maybe(json->Vsource);
- rb_gc_mark_maybe(json->create_id);
- rb_gc_mark_maybe(json->object_class);
- rb_gc_mark_maybe(json->array_class);
+ JSON_ParserConfig *config = ptr;
+ rb_gc_mark_movable(config->on_load_proc);
+ rb_gc_mark_movable(config->decimal_class);
}
-static void JSON_free(JSON_Parser *json)
+static size_t JSON_ParserConfig_memsize(const void *ptr)
{
- ruby_xfree(json);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ return 0;
+#else
+ return sizeof(JSON_ParserConfig);
+#endif
}
-static VALUE cJSON_parser_s_allocate(VALUE klass)
+static void JSON_ParserConfig_compact(void *ptr)
{
- JSON_Parser *json = JSON_allocate();
- return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
+ JSON_ParserConfig *config = ptr;
+ config->on_load_proc = rb_gc_location(config->on_load_proc);
+ config->decimal_class = rb_gc_location(config->decimal_class);
}
-/*
- * call-seq: source()
- *
- * Returns a copy of the current _source_ string, that was used to construct
- * this Parser.
- */
-static VALUE cParser_source(VALUE self)
+static const rb_data_type_t JSON_ParserConfig_type = {
+ .wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
+ .function = {
+ .dmark = JSON_ParserConfig_mark,
+ .dfree = RUBY_DEFAULT_FREE,
+ .dsize = JSON_ParserConfig_memsize,
+ .dcompact = JSON_ParserConfig_compact,
+ },
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
+};
+
+static VALUE cJSON_parser_s_allocate(VALUE klass)
{
- GET_PARSER;
- return rb_str_dup(json->Vsource);
+ JSON_ParserConfig *config;
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
}
-void Init_parser()
+void Init_parser(void)
{
+#ifdef HAVE_RB_EXT_RACTOR_SAFE
+ rb_ext_ractor_safe(true);
+#endif
+
+#undef rb_intern
rb_require("json/common");
mJSON = rb_define_module("JSON");
- mExt = rb_define_module_under(mJSON, "Ext");
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
- eParserError = rb_path2class("JSON::ParserError");
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
eNestingError = rb_path2class("JSON::NestingError");
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
- rb_define_method(cParser, "parse", cParser_parse, 0);
- rb_define_method(cParser, "source", cParser_source, 0);
+ rb_gc_register_mark_object(eNestingError);
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
+
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
+ rb_gc_register_mark_object(CNaN);
+
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
- CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
+ rb_gc_register_mark_object(CInfinity);
- i_json_creatable_p = rb_intern("json_creatable?");
- i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
- i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_key_p = rb_intern("key?");
- i_deep_const_get = rb_intern("deep_const_get");
-#ifdef HAVE_RUBY_ENCODING_H
- CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
- CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
- CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
- CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
- CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
- CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
- i_encoding = rb_intern("encoding");
+ CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
+ rb_gc_register_mark_object(CMinusInfinity);
+
+ rb_global_variable(&Encoding_UTF_8);
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
+ sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
+ sym_freeze = ID2SYM(rb_intern("freeze"));
+ sym_on_load = ID2SYM(rb_intern("on_load"));
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
+
+ i_new = rb_intern("new");
+ i_try_convert = rb_intern("try_convert");
+ i_uminus = rb_intern("-@");
i_encode = rb_intern("encode");
- i_encode_bang = rb_intern("encode!");
- i_force_encoding = rb_intern("force_encoding");
-#else
- i_iconv = rb_intern("iconv");
+
+ binary_encindex = rb_ascii8bit_encindex();
+ utf8_encindex = rb_utf8_encindex();
+ enc_utf8 = rb_utf8_encoding();
+
+#ifdef HAVE_SIMD
+ simd_impl = find_simd_implementation();
#endif
}
-
-/*
- * Local variables:
- * mode: c
- * c-file-style: ruby
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h
deleted file mode 100644
index a344da058c..0000000000
--- a/ext/json/parser/parser.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef _PARSER_H_
-#define _PARSER_H_
-
-#include "ruby.h"
-
-#if HAVE_RE_H
-#include "re.h"
-#endif
-
-#ifdef HAVE_RUBY_ENCODING_H
-#include "ruby/encoding.h"
-#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-#else
-#define FORCE_UTF8(obj)
-#endif
-
-#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key))
-
-/* unicode */
-
-typedef unsigned long UTF32; /* at least 32 bits */
-typedef unsigned short UTF16; /* at least 16 bits */
-typedef unsigned char UTF8; /* typically 8 bits */
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_SUR_HIGH_START (UTF32)0xD800
-#define UNI_SUR_HIGH_END (UTF32)0xDBFF
-#define UNI_SUR_LOW_START (UTF32)0xDC00
-#define UNI_SUR_LOW_END (UTF32)0xDFFF
-
-typedef struct JSON_ParserStruct {
- VALUE Vsource;
- char *source;
- long len;
- char *memo;
- VALUE create_id;
- int max_nesting;
- int current_nesting;
- int allow_nan;
- int parsing_name;
- int symbolize_names;
- VALUE object_class;
- VALUE array_class;
-} JSON_Parser;
-
-#define GET_PARSER \
- GET_PARSER_INIT; \
- if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
-#define GET_PARSER_INIT \
- JSON_Parser *json; \
- Data_Get_Struct(self, JSON_Parser, json)
-
-#define MinusInfinity "-Infinity"
-#define EVIL 0x666
-
-static UTF32 unescape_unicode(const unsigned char *p);
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch);
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd);
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static VALUE convert_encoding(VALUE source);
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);
-static VALUE cParser_parse(VALUE self);
-static JSON_Parser *JSON_allocate();
-static void JSON_mark(JSON_Parser *json);
-static void JSON_free(JSON_Parser *json);
-static VALUE cJSON_parser_s_allocate(VALUE klass);
-static VALUE cParser_source(VALUE self);
-
-#endif
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
deleted file mode 100644
index 278386432b..0000000000
--- a/ext/json/parser/parser.rl
+++ /dev/null
@@ -1,804 +0,0 @@
-#include "parser.h"
-
-/* unicode */
-
-static const char digit_values[256] = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1
-};
-
-static UTF32 unescape_unicode(const unsigned char *p)
-{
- char b;
- UTF32 result = 0;
- b = digit_values[p[0]];
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- result = (result << 4) | b;
- b = digit_values[p[1]];
- result = (result << 4) | b;
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- b = digit_values[p[2]];
- result = (result << 4) | b;
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- b = digit_values[p[3]];
- result = (result << 4) | b;
- if (b < 0) return UNI_REPLACEMENT_CHAR;
- return result;
-}
-
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
-{
- int len = 1;
- if (ch <= 0x7F) {
- buf[0] = (char) ch;
- } else if (ch <= 0x07FF) {
- buf[0] = (char) ((ch >> 6) | 0xC0);
- buf[1] = (char) ((ch & 0x3F) | 0x80);
- len++;
- } else if (ch <= 0xFFFF) {
- buf[0] = (char) ((ch >> 12) | 0xE0);
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
- buf[2] = (char) ((ch & 0x3F) | 0x80);
- len += 2;
- } else if (ch <= 0x1fffff) {
- buf[0] =(char) ((ch >> 18) | 0xF0);
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
- buf[3] =(char) ((ch & 0x3F) | 0x80);
- len += 3;
- } else {
- buf[0] = '?';
- }
- return len;
-}
-
-#ifdef HAVE_RUBY_ENCODING_H
-static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
- CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
-static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
-#else
-static ID i_iconv;
-#endif
-
-static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class,
- i_array_class, i_key_p, i_deep_const_get;
-
-%%{
- machine JSON_common;
-
- cr = '\n';
- cr_neg = [^\n];
- ws = [ \t\r\n];
- c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
- cpp_comment = '//' cr_neg* cr;
- comment = c_comment | cpp_comment;
- ignore = ws | comment;
- name_separator = ':';
- value_separator = ',';
- Vnull = 'null';
- Vfalse = 'false';
- Vtrue = 'true';
- VNaN = 'NaN';
- VInfinity = 'Infinity';
- VMinusInfinity = '-Infinity';
- begin_value = [nft\"\-\[\{NI] | digit;
- begin_object = '{';
- end_object = '}';
- begin_array = '[';
- end_array = ']';
- begin_string = '"';
- begin_name = begin_string;
- begin_number = digit | '-';
-}%%
-
-%%{
- machine JSON_object;
- include JSON_common;
-
- write data;
-
- action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v);
- if (np == NULL) {
- fhold; fbreak;
- } else {
- rb_hash_aset(*result, last_name, v);
- fexec np;
- }
- }
-
- action parse_name {
- char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, fpc, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action exit { fhold; fbreak; }
-
- a_pair = ignore* begin_name >parse_name
- ignore* name_separator ignore*
- begin_value >parse_value;
-
- main := begin_object
- (a_pair (ignore* value_separator a_pair)*)?
- ignore* end_object @exit;
-}%%
-
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
-
- if (json->max_nesting && json->current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
- }
-
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
-
- %% write init;
- %% write exec;
-
- if (cs >= JSON_object_first_final) {
- if (RTEST(json->create_id)) {
- VALUE klassname = rb_hash_aref(*result, json->create_id);
- if (!NIL_P(klassname)) {
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
- if RTEST(rb_funcall(klass, i_json_creatable_p, 0)) {
- *result = rb_funcall(klass, i_json_create, 1, *result);
- }
- }
- }
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_value;
- include JSON_common;
-
- write data;
-
- action parse_null {
- *result = Qnil;
- }
- action parse_false {
- *result = Qfalse;
- }
- action parse_true {
- *result = Qtrue;
- }
- action parse_nan {
- if (json->allow_nan) {
- *result = CNaN;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
- }
- }
- action parse_infinity {
- if (json->allow_nan) {
- *result = CInfinity;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
- }
- }
- action parse_string {
- char *np = JSON_parse_string(json, fpc, pe, result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_number {
- char *np;
- if(pe > fpc + 9 && !strncmp(MinusInfinity, fpc, 9)) {
- if (json->allow_nan) {
- *result = CMinusInfinity;
- fexec p + 10;
- fhold; fbreak;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- }
- }
- np = JSON_parse_float(json, fpc, pe, result);
- if (np != NULL) fexec np;
- np = JSON_parse_integer(json, fpc, pe, result);
- if (np != NULL) fexec np;
- fhold; fbreak;
- }
-
- action parse_array {
- char *np;
- json->current_nesting++;
- np = JSON_parse_array(json, fpc, pe, result);
- json->current_nesting--;
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_object {
- char *np;
- json->current_nesting++;
- np = JSON_parse_object(json, fpc, pe, result);
- json->current_nesting--;
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action exit { fhold; fbreak; }
-
-main := (
- Vnull @parse_null |
- Vfalse @parse_false |
- Vtrue @parse_true |
- VNaN @parse_nan |
- VInfinity @parse_infinity |
- begin_number >parse_number |
- begin_string >parse_string |
- begin_array >parse_array |
- begin_object >parse_object
- ) %*exit;
-}%%
-
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- %% write init;
- %% write exec;
-
- if (cs >= JSON_value_first_final) {
- return p;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_integer;
-
- write data;
-
- action exit { fhold; fbreak; }
-
- main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit);
-}%%
-
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- %% write init;
- json->memo = p;
- %% write exec;
-
- if (cs >= JSON_integer_first_final) {
- long len = p - json->memo;
- *result = rb_Integer(rb_str_new(json->memo, len));
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-%%{
- machine JSON_float;
- include JSON_common;
-
- write data;
-
- action exit { fhold; fbreak; }
-
- main := '-'? (
- (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
- | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
- ) (^[0-9Ee.\-] @exit );
-}%%
-
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- %% write init;
- json->memo = p;
- %% write exec;
-
- if (cs >= JSON_float_first_final) {
- long len = p - json->memo;
- *result = rb_Float(rb_str_new(json->memo, len));
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-%%{
- machine JSON_array;
- include JSON_common;
-
- write data;
-
- action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v);
- if (np == NULL) {
- fhold; fbreak;
- } else {
- rb_ary_push(*result, v);
- fexec np;
- }
- }
-
- action exit { fhold; fbreak; }
-
- next_element = value_separator ignore* begin_value >parse_value;
-
- main := begin_array ignore*
- ((begin_value >parse_value ignore*)
- (ignore* next_element ignore*)*)?
- end_array @exit;
-}%%
-
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
- VALUE array_class = json->array_class;
-
- if (json->max_nesting && json->current_nesting > json->max_nesting) {
- rb_raise(eNestingError, "nesting of %d is too deep", json->current_nesting);
- }
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
-
- %% write init;
- %% write exec;
-
- if(cs >= JSON_array_first_final) {
- return p + 1;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return NULL;
- }
-}
-
-static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd)
-{
- char *p = string, *pe = string, *unescape;
- int unescape_len;
-
- while (pe < stringEnd) {
- if (*pe == '\\') {
- unescape = (char *) "?";
- unescape_len = 1;
- if (pe > p) rb_str_buf_cat(result, p, pe - p);
- switch (*++pe) {
- case 'n':
- unescape = (char *) "\n";
- break;
- case 'r':
- unescape = (char *) "\r";
- break;
- case 't':
- unescape = (char *) "\t";
- break;
- case '"':
- unescape = (char *) "\"";
- break;
- case '\\':
- unescape = (char *) "\\";
- break;
- case 'b':
- unescape = (char *) "\b";
- break;
- case 'f':
- unescape = (char *) "\f";
- break;
- case 'u':
- if (pe > stringEnd - 4) {
- return Qnil;
- } else {
- char buf[4];
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
- pe += 3;
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
- pe++;
- if (pe > stringEnd - 6) return Qnil;
- if (pe[0] == '\\' && pe[1] == 'u') {
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
- | (sur & 0x3FF));
- pe += 5;
- } else {
- unescape = (char *) "?";
- break;
- }
- }
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
- unescape = buf;
- }
- break;
- default:
- p = pe;
- continue;
- }
- rb_str_buf_cat(result, unescape, unescape_len);
- p = ++pe;
- } else {
- pe++;
- }
- }
- rb_str_buf_cat(result, p, pe - p);
- return result;
-}
-
-%%{
- machine JSON_string;
- include JSON_common;
-
- write data;
-
- action parse_string {
- *result = json_string_unescape(*result, json->memo + 1, p);
- if (NIL_P(*result)) {
- fhold;
- fbreak;
- } else {
- FORCE_UTF8(*result);
- fexec p + 1;
- }
- }
-
- action exit { fhold; fbreak; }
-
- main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
-}%%
-
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
-{
- int cs = EVIL;
-
- *result = rb_str_buf_new(0);
- %% write init;
- json->memo = p;
- %% write exec;
-
- if (json->symbolize_names && json->parsing_name) {
- *result = rb_str_intern(*result);
- }
- if (cs >= JSON_string_first_final) {
- return p + 1;
- } else {
- return NULL;
- }
-}
-
-
-%%{
- machine JSON;
-
- write data;
-
- include JSON_common;
-
- action parse_object {
- char *np;
- json->current_nesting = 1;
- np = JSON_parse_object(json, fpc, pe, &result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_array {
- char *np;
- json->current_nesting = 1;
- np = JSON_parse_array(json, fpc, pe, &result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- main := ignore* (
- begin_object >parse_object |
- begin_array >parse_array
- ) ignore*;
-}%%
-
-/*
- * Document-class: JSON::Ext::Parser
- *
- * This is the JSON parser implemented as a C extension. It can be configured
- * to be used by setting
- *
- * JSON.parser = JSON::Ext::Parser
- *
- * with the method parser= in JSON.
- *
- */
-
-static VALUE convert_encoding(VALUE source)
-{
- char *ptr = RSTRING_PTR(source);
- long len = RSTRING_LEN(source);
- if (len < 2) {
- rb_raise(eParserError, "A JSON text must at least contain two octets!");
- }
-#ifdef HAVE_RUBY_ENCODING_H
- {
- VALUE encoding = rb_funcall(source, i_encoding, 0);
- if (encoding == CEncoding_ASCII_8BIT) {
- if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
- } else {
- FORCE_UTF8(source);
- }
- } else {
- source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
- }
- }
-#else
- if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
- } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
- } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
- } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
- source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
- }
-#endif
- return source;
-}
-
-/*
- * call-seq: new(source, opts => {})
- *
- * Creates a new JSON::Ext::Parser instance for the string _source_.
- *
- * Creates a new JSON::Ext::Parser instance for the string _source_.
- *
- * It will be configured by the _opts_ hash. _opts_ can have the following
- * keys:
- *
- * _opts_ can have the following keys:
- * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
- * structures. Disable depth checking with :max_nesting => false|nil|0, it
- * defaults to 19.
- * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
- * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
- * false.
- * * *symbolize_names*: If set to true, returns symbols for the names
- * (keys) in a JSON object. Otherwise strings are returned, which is also
- * the default.
- * * *create_additions*: If set to false, the Parser doesn't create
- * additions even if a matchin class and create_id was found. This option
- * defaults to true.
- * * *object_class*: Defaults to Hash
- * * *array_class*: Defaults to Array
- */
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
-{
- char *ptr;
- long len;
- VALUE source, opts;
- GET_PARSER_INIT;
-
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
- rb_scan_args(argc, argv, "11", &source, &opts);
- source = convert_encoding(StringValue(source));
- ptr = RSTRING_PTR(source);
- len = RSTRING_LEN(source);
- if (!NIL_P(opts)) {
- opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
- if (NIL_P(opts)) {
- rb_raise(rb_eArgError, "opts needs to be like a hash");
- } else {
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 19;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- VALUE allow_nan = rb_hash_aref(opts, tmp);
- json->allow_nan = RTEST(allow_nan) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- VALUE symbolize_names = rb_hash_aref(opts, tmp);
- json->symbolize_names = RTEST(symbolize_names) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- VALUE create_additions = rb_hash_aref(opts, tmp);
- if (RTEST(create_additions)) {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- } else {
- json->create_id = Qnil;
- }
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- }
- } else {
- json->max_nesting = 19;
- json->allow_nan = 0;
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- json->object_class = Qnil;
- json->array_class = Qnil;
- }
- json->current_nesting = 0;
- json->len = len;
- json->source = ptr;
- json->Vsource = source;
- return self;
-}
-
-/*
- * call-seq: parse()
- *
- * Parses the current JSON text _source_ and returns the complete data
- * structure as a result.
- */
-static VALUE cParser_parse(VALUE self)
-{
- char *p, *pe;
- int cs = EVIL;
- VALUE result = Qnil;
- GET_PARSER;
-
- %% write init;
- p = json->source;
- pe = p + json->len;
- %% write exec;
-
- if (cs >= JSON_first_final && p == pe) {
- return result;
- } else {
- rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
- return Qnil;
- }
-}
-
-static JSON_Parser *JSON_allocate()
-{
- JSON_Parser *json = ALLOC(JSON_Parser);
- MEMZERO(json, JSON_Parser, 1);
- return json;
-}
-
-static void JSON_mark(JSON_Parser *json)
-{
- rb_gc_mark_maybe(json->Vsource);
- rb_gc_mark_maybe(json->create_id);
- rb_gc_mark_maybe(json->object_class);
- rb_gc_mark_maybe(json->array_class);
-}
-
-static void JSON_free(JSON_Parser *json)
-{
- ruby_xfree(json);
-}
-
-static VALUE cJSON_parser_s_allocate(VALUE klass)
-{
- JSON_Parser *json = JSON_allocate();
- return Data_Wrap_Struct(klass, JSON_mark, JSON_free, json);
-}
-
-/*
- * call-seq: source()
- *
- * Returns a copy of the current _source_ string, that was used to construct
- * this Parser.
- */
-static VALUE cParser_source(VALUE self)
-{
- GET_PARSER;
- return rb_str_dup(json->Vsource);
-}
-
-void Init_parser()
-{
- rb_require("json/common");
- mJSON = rb_define_module("JSON");
- mExt = rb_define_module_under(mJSON, "Ext");
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
- eParserError = rb_path2class("JSON::ParserError");
- eNestingError = rb_path2class("JSON::NestingError");
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
- rb_define_method(cParser, "parse", cParser_parse, 0);
- rb_define_method(cParser, "source", cParser_source, 0);
-
- CNaN = rb_const_get(mJSON, rb_intern("NaN"));
- CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
- CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
-
- i_json_creatable_p = rb_intern("json_creatable?");
- i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
- i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_key_p = rb_intern("key?");
- i_deep_const_get = rb_intern("deep_const_get");
-#ifdef HAVE_RUBY_ENCODING_H
- CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
- CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
- CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
- CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
- CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
- CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
- i_encoding = rb_intern("encoding");
- i_encode = rb_intern("encode");
- i_encode_bang = rb_intern("encode!");
- i_force_encoding = rb_intern("force_encoding");
-#else
- i_iconv = rb_intern("iconv");
-#endif
-}
-
-/*
- * Local variables:
- * mode: c
- * c-file-style: ruby
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk
deleted file mode 100644