diff options
Diffstat (limited to 'ext/json')
40 files changed, 7725 insertions, 4516 deletions
diff --git a/ext/json/extconf.rb b/ext/json/extconf.rb index 7595d58a98..8a99b6a5c8 100644 --- a/ext/json/extconf.rb +++ b/ext/json/extconf.rb @@ -1,2 +1,3 @@ require 'mkmf' + create_makefile('json') diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index dc8f406b5b..752d153b31 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -1,89 +1,71 @@ - #ifndef _FBUFFER_H_ #define _FBUFFER_H_ -#include "ruby.h" - -#ifndef RHASH_SIZE -#define RHASH_SIZE(hsh) (RHASH(hsh)->tbl->num_entries) -#endif - -#ifndef RFLOAT_VALUE -#define RFLOAT_VALUE(val) (RFLOAT(val)->value) -#endif - -#ifndef RARRAY_LEN -#define RARRAY_LEN(ARRAY) RARRAY(ARRAY)->len -#endif -#ifndef RSTRING_PTR -#define RSTRING_PTR(string) RSTRING(string)->ptr -#endif -#ifndef RSTRING_LEN -#define RSTRING_LEN(string) RSTRING(string)->len -#endif +#include "../json.h" +#include "../vendor/jeaiii-ltoa.h" -#ifdef PRIsVALUE -# define RB_OBJ_CLASSNAME(obj) rb_obj_class(obj) -# define RB_OBJ_STRING(obj) (obj) -#else -# define PRIsVALUE "s" -# define RB_OBJ_CLASSNAME(obj) rb_obj_classname(obj) -# define RB_OBJ_STRING(obj) StringValueCStr(obj) -#endif - -#ifdef HAVE_RUBY_ENCODING_H -#include "ruby/encoding.h" -#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding()) -#else -#define FORCE_UTF8(obj) -#endif - -/* We don't need to guard objects for rbx, so let's do nothing at all. */ -#ifndef RB_GC_GUARD -#define RB_GC_GUARD(object) -#endif +enum fbuffer_type { + FBUFFER_HEAP_ALLOCATED = 0, + FBUFFER_STACK_ALLOCATED = 1, +}; typedef struct FBufferStruct { + enum fbuffer_type type; unsigned long initial_length; - char *ptr; unsigned long len; unsigned long capa; +#if JSON_DEBUG + unsigned long requested; +#endif + char *ptr; + VALUE io; } FBuffer; +#define FBUFFER_STACK_SIZE 512 +#define FBUFFER_IO_BUFFER_SIZE (16384 - 1) #define FBUFFER_INITIAL_LENGTH_DEFAULT 1024 -#define FBUFFER_PTR(fb) (fb->ptr) -#define FBUFFER_LEN(fb) (fb->len) -#define FBUFFER_CAPA(fb) (fb->capa) +#define FBUFFER_PTR(fb) ((fb)->ptr) +#define FBUFFER_LEN(fb) ((fb)->len) +#define FBUFFER_CAPA(fb) ((fb)->capa) #define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) -static FBuffer *fbuffer_alloc(unsigned long initial_length); static void fbuffer_free(FBuffer *fb); static void fbuffer_clear(FBuffer *fb); static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); -#ifdef JSON_GENERATOR static void fbuffer_append_long(FBuffer *fb, long number); +static inline void fbuffer_append_char(FBuffer *fb, char newchr); +static VALUE fbuffer_finalize(FBuffer *fb); + +static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) +{ + fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; + if (stack_buffer) { + fb->type = FBUFFER_STACK_ALLOCATED; + fb->ptr = stack_buffer; + fb->capa = stack_buffer_size; + } +#if JSON_DEBUG + fb->requested = 0; #endif -static void fbuffer_append_char(FBuffer *fb, char newchr); -#ifdef JSON_GENERATOR -static FBuffer *fbuffer_dup(FBuffer *fb); -static VALUE fbuffer_to_s(FBuffer *fb); -#endif +} -static FBuffer *fbuffer_alloc(unsigned long initial_length) +static inline void fbuffer_consumed(FBuffer *fb, unsigned long consumed) { - FBuffer *fb; - if (initial_length <= 0) initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - fb = ALLOC(FBuffer); - memset((void *) fb, 0, sizeof(FBuffer)); - fb->initial_length = initial_length; - return fb; +#if JSON_DEBUG + if (consumed > fb->requested) { + rb_bug("fbuffer: Out of bound write"); + } + fb->requested = 0; +#endif + fb->len += consumed; } static void fbuffer_free(FBuffer *fb) { - if (fb->ptr) ruby_xfree(fb->ptr); - ruby_xfree(fb); + if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { + ruby_xfree(fb->ptr); + } } static void fbuffer_clear(FBuffer *fb) @@ -91,97 +73,175 @@ static void fbuffer_clear(FBuffer *fb) fb->len = 0; } -static void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_flush(FBuffer *fb) +{ + rb_io_write(fb->io, rb_utf8_str_new(fb->ptr, fb->len)); + fbuffer_clear(fb); +} + +static void fbuffer_realloc(FBuffer *fb, unsigned long required) { + if (required > fb->capa) { + if (fb->type == FBUFFER_STACK_ALLOCATED) { + const char *old_buffer = fb->ptr; + fb->ptr = ALLOC_N(char, required); + fb->type = FBUFFER_HEAP_ALLOCATED; + MEMCPY(fb->ptr, old_buffer, char, fb->len); + } else { + REALLOC_N(fb->ptr, char, required); + } + fb->capa = required; + } +} + +static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) +{ + if (RB_UNLIKELY(fb->io)) { + if (fb->capa < FBUFFER_IO_BUFFER_SIZE) { + fbuffer_realloc(fb, FBUFFER_IO_BUFFER_SIZE); + } else { + fbuffer_flush(fb); + } + + if (RB_LIKELY(requested < fb->capa)) { + return; + } + } + unsigned long required; - if (!fb->ptr) { + if (RB_UNLIKELY(!fb->ptr)) { fb->ptr = ALLOC_N(char, fb->initial_length); fb->capa = fb->initial_length; } for (required = fb->capa; requested > required - fb->len; required <<= 1); - if (required > fb->capa) { - REALLOC_N(fb->ptr, char, required); - fb->capa = required; + fbuffer_realloc(fb, required); +} + +static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +{ +#if JSON_DEBUG + fb->requested = requested; +#endif + + if (RB_UNLIKELY(requested > fb->capa - fb->len)) { + fbuffer_do_inc_capa(fb, requested); } } -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len) +static inline void fbuffer_append_reserved(FBuffer *fb, const char *newstr, unsigned long len) +{ + MEMCPY(fb->ptr + fb->len, newstr, char, len); + fbuffer_consumed(fb, len); +} + +static inline void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len) { if (len > 0) { fbuffer_inc_capa(fb, len); - MEMCPY(fb->ptr + fb->len, newstr, char, len); - fb->len += len; + fbuffer_append_reserved(fb, newstr, len); } } -#ifdef JSON_GENERATOR +/* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */ +static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr) +{ +#if JSON_DEBUG + if (fb->requested < 1) { + rb_bug("fbuffer: unreserved write"); + } + fb->requested--; +#endif + + fb->ptr[fb->len] = chr; + fb->len++; +} + static void fbuffer_append_str(FBuffer *fb, VALUE str) { const char *newstr = StringValuePtr(str); unsigned long len = RSTRING_LEN(str); - RB_GC_GUARD(str); - fbuffer_append(fb, newstr, len); } + +static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat) +{ + const char *newstr = StringValuePtr(str); + unsigned long len = RSTRING_LEN(str); + + fbuffer_inc_capa(fb, repeat * len); + while (repeat) { +#if JSON_DEBUG + fb->requested = len; #endif + fbuffer_append_reserved(fb, newstr, len); + repeat--; + } +} -static void fbuffer_append_char(FBuffer *fb, char newchr) +static inline void fbuffer_append_char(FBuffer *fb, char newchr) { fbuffer_inc_capa(fb, 1); *(fb->ptr + fb->len) = newchr; - fb->len++; + fbuffer_consumed(fb, 1); } -#ifdef JSON_GENERATOR -static void freverse(char *start, char *end) +static inline char *fbuffer_cursor(FBuffer *fb) { - char c; - - while (end > start) { - c = *end, *end-- = *start, *start++ = c; - } + return fb->ptr + fb->len; } -static long fltoa(long number, char *buf) +static inline void fbuffer_advance_to(FBuffer *fb, char *end) { - static char digits[] = "0123456789"; - long sign = number; - char* tmp = buf; - - if (sign < 0) number = -number; - do *tmp++ = digits[number % 10]; while (number /= 10); - if (sign < 0) *tmp++ = '-'; - freverse(buf, tmp - 1); - return tmp - buf; + fbuffer_consumed(fb, (end - fb->ptr) - fb->len); } +/* + * Appends the decimal string representation of \a number into the buffer. + */ static void fbuffer_append_long(FBuffer *fb, long number) { - char buf[20]; - unsigned long len = fltoa(number, buf); - fbuffer_append(fb, buf, len); -} - -static FBuffer *fbuffer_dup(FBuffer *fb) -{ - unsigned long len = fb->len; - FBuffer *result; + /* + * The jeaiii_ultoa() function produces digits left-to-right, + * allowing us to write directly into the buffer, but we don't know + * the number of resulting characters. + * + * We do know, however, that the `number` argument is always in the + * range 0xc000000000000000 to 0x3fffffffffffffff, or, in decimal, + * -4611686018427387904 to 4611686018427387903. The max number of chars + * generated is therefore 20 (including a potential sign character). + */ + + static const int MAX_CHARS_FOR_LONG = 20; + + fbuffer_inc_capa(fb, MAX_CHARS_FOR_LONG); + + if (number < 0) { + fbuffer_append_reserved_char(fb, '-'); + + /* + * Since number is always > LONG_MIN, `-number` will not overflow + * and is always the positive abs() value. + */ + number = -number; + } - result = fbuffer_alloc(len); - fbuffer_append(result, FBUFFER_PAIR(fb)); - return result; + char *end = jeaiii_ultoa(fbuffer_cursor(fb), number); + fbuffer_advance_to(fb, end); } -static VALUE fbuffer_to_s(FBuffer *fb) +static VALUE fbuffer_finalize(FBuffer *fb) { - VALUE result = rb_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); - fbuffer_free(fb); - FORCE_UTF8(result); - return result; + if (fb->io) { + fbuffer_flush(fb); + rb_io_flush(fb->io); + return fb->io; + } else { + return rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); + } } -#endif -#endif + +#endif // _FBUFFER_H_ diff --git a/ext/json/generator/depend b/ext/json/generator/depend index 3f04c0d625..3ba4acfdd2 100644 --- a/ext/json/generator/depend +++ b/ext/json/generator/depend @@ -1,5 +1,5 @@ $(OBJS): $(ruby_headers) -generator.o: generator.c generator.h $(srcdir)/../fbuffer/fbuffer.h +generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h # AUTOGENERATED DEPENDENCIES START generator.o: $(RUBY_EXTCONF_H) @@ -7,9 +7,168 @@ generator.o: $(arch_hdrdir)/ruby/config.h generator.o: $(hdrdir)/ruby.h generator.o: $(hdrdir)/ruby/assert.h generator.o: $(hdrdir)/ruby/backward.h +generator.o: $(hdrdir)/ruby/backward/2/assume.h +generator.o: $(hdrdir)/ruby/backward/2/attributes.h +generator.o: $(hdrdir)/ruby/backward/2/bool.h +generator.o: $(hdrdir)/ruby/backward/2/inttypes.h +generator.o: $(hdrdir)/ruby/backward/2/limits.h +generator.o: $(hdrdir)/ruby/backward/2/long_long.h +generator.o: $(hdrdir)/ruby/backward/2/stdalign.h +generator.o: $(hdrdir)/ruby/backward/2/stdarg.h generator.o: $(hdrdir)/ruby/defines.h generator.o: $(hdrdir)/ruby/encoding.h generator.o: $(hdrdir)/ruby/intern.h +generator.o: $(hdrdir)/ruby/internal/abi.h +generator.o: $(hdrdir)/ruby/internal/anyargs.h +generator.o: $(hdrdir)/ruby/internal/arithmetic.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/char.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/double.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/int.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/long.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/short.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +generator.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +generator.o: $(hdrdir)/ruby/internal/assume.h +generator.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +generator.o: $(hdrdir)/ruby/internal/attr/artificial.h +generator.o: $(hdrdir)/ruby/internal/attr/cold.h +generator.o: $(hdrdir)/ruby/internal/attr/const.h +generator.o: $(hdrdir)/ruby/internal/attr/constexpr.h +generator.o: $(hdrdir)/ruby/internal/attr/deprecated.h +generator.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +generator.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +generator.o: $(hdrdir)/ruby/internal/attr/error.h +generator.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +generator.o: $(hdrdir)/ruby/internal/attr/forceinline.h +generator.o: $(hdrdir)/ruby/internal/attr/format.h +generator.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +generator.o: $(hdrdir)/ruby/internal/attr/noalias.h +generator.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +generator.o: $(hdrdir)/ruby/internal/attr/noexcept.h +generator.o: $(hdrdir)/ruby/internal/attr/noinline.h +generator.o: $(hdrdir)/ruby/internal/attr/nonnull.h +generator.o: $(hdrdir)/ruby/internal/attr/noreturn.h +generator.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +generator.o: $(hdrdir)/ruby/internal/attr/pure.h +generator.o: $(hdrdir)/ruby/internal/attr/restrict.h +generator.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +generator.o: $(hdrdir)/ruby/internal/attr/warning.h +generator.o: $(hdrdir)/ruby/internal/attr/weakref.h +generator.o: $(hdrdir)/ruby/internal/cast.h +generator.o: $(hdrdir)/ruby/internal/compiler_is.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +generator.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +generator.o: $(hdrdir)/ruby/internal/compiler_since.h +generator.o: $(hdrdir)/ruby/internal/config.h +generator.o: $(hdrdir)/ruby/internal/constant_p.h +generator.o: $(hdrdir)/ruby/internal/core.h +generator.o: $(hdrdir)/ruby/internal/core/rarray.h +generator.o: $(hdrdir)/ruby/internal/core/rbasic.h +generator.o: $(hdrdir)/ruby/internal/core/rbignum.h +generator.o: $(hdrdir)/ruby/internal/core/rclass.h +generator.o: $(hdrdir)/ruby/internal/core/rdata.h +generator.o: $(hdrdir)/ruby/internal/core/rfile.h +generator.o: $(hdrdir)/ruby/internal/core/rhash.h +generator.o: $(hdrdir)/ruby/internal/core/rmatch.h +generator.o: $(hdrdir)/ruby/internal/core/robject.h +generator.o: $(hdrdir)/ruby/internal/core/rregexp.h +generator.o: $(hdrdir)/ruby/internal/core/rstring.h +generator.o: $(hdrdir)/ruby/internal/core/rstruct.h +generator.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +generator.o: $(hdrdir)/ruby/internal/ctype.h +generator.o: $(hdrdir)/ruby/internal/dllexport.h +generator.o: $(hdrdir)/ruby/internal/dosish.h +generator.o: $(hdrdir)/ruby/internal/encoding/coderange.h +generator.o: $(hdrdir)/ruby/internal/encoding/ctype.h +generator.o: $(hdrdir)/ruby/internal/encoding/encoding.h +generator.o: $(hdrdir)/ruby/internal/encoding/pathname.h +generator.o: $(hdrdir)/ruby/internal/encoding/re.h +generator.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +generator.o: $(hdrdir)/ruby/internal/encoding/string.h +generator.o: $(hdrdir)/ruby/internal/encoding/symbol.h +generator.o: $(hdrdir)/ruby/internal/encoding/transcode.h +generator.o: $(hdrdir)/ruby/internal/error.h +generator.o: $(hdrdir)/ruby/internal/eval.h +generator.o: $(hdrdir)/ruby/internal/event.h +generator.o: $(hdrdir)/ruby/internal/fl_type.h +generator.o: $(hdrdir)/ruby/internal/gc.h +generator.o: $(hdrdir)/ruby/internal/glob.h +generator.o: $(hdrdir)/ruby/internal/globals.h +generator.o: $(hdrdir)/ruby/internal/has/attribute.h +generator.o: $(hdrdir)/ruby/internal/has/builtin.h +generator.o: $(hdrdir)/ruby/internal/has/c_attribute.h +generator.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +generator.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +generator.o: $(hdrdir)/ruby/internal/has/extension.h +generator.o: $(hdrdir)/ruby/internal/has/feature.h +generator.o: $(hdrdir)/ruby/internal/has/warning.h +generator.o: $(hdrdir)/ruby/internal/intern/array.h +generator.o: $(hdrdir)/ruby/internal/intern/bignum.h +generator.o: $(hdrdir)/ruby/internal/intern/class.h +generator.o: $(hdrdir)/ruby/internal/intern/compar.h +generator.o: $(hdrdir)/ruby/internal/intern/complex.h +generator.o: $(hdrdir)/ruby/internal/intern/cont.h +generator.o: $(hdrdir)/ruby/internal/intern/dir.h +generator.o: $(hdrdir)/ruby/internal/intern/enum.h +generator.o: $(hdrdir)/ruby/internal/intern/enumerator.h +generator.o: $(hdrdir)/ruby/internal/intern/error.h +generator.o: $(hdrdir)/ruby/internal/intern/eval.h +generator.o: $(hdrdir)/ruby/internal/intern/file.h +generator.o: $(hdrdir)/ruby/internal/intern/hash.h +generator.o: $(hdrdir)/ruby/internal/intern/io.h +generator.o: $(hdrdir)/ruby/internal/intern/load.h +generator.o: $(hdrdir)/ruby/internal/intern/marshal.h +generator.o: $(hdrdir)/ruby/internal/intern/numeric.h +generator.o: $(hdrdir)/ruby/internal/intern/object.h +generator.o: $(hdrdir)/ruby/internal/intern/parse.h +generator.o: $(hdrdir)/ruby/internal/intern/proc.h +generator.o: $(hdrdir)/ruby/internal/intern/process.h +generator.o: $(hdrdir)/ruby/internal/intern/random.h +generator.o: $(hdrdir)/ruby/internal/intern/range.h +generator.o: $(hdrdir)/ruby/internal/intern/rational.h +generator.o: $(hdrdir)/ruby/internal/intern/re.h +generator.o: $(hdrdir)/ruby/internal/intern/ruby.h +generator.o: $(hdrdir)/ruby/internal/intern/select.h +generator.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +generator.o: $(hdrdir)/ruby/internal/intern/set.h +generator.o: $(hdrdir)/ruby/internal/intern/signal.h +generator.o: $(hdrdir)/ruby/internal/intern/sprintf.h +generator.o: $(hdrdir)/ruby/internal/intern/string.h +generator.o: $(hdrdir)/ruby/internal/intern/struct.h +generator.o: $(hdrdir)/ruby/internal/intern/thread.h +generator.o: $(hdrdir)/ruby/internal/intern/time.h +generator.o: $(hdrdir)/ruby/internal/intern/variable.h +generator.o: $(hdrdir)/ruby/internal/intern/vm.h +generator.o: $(hdrdir)/ruby/internal/interpreter.h +generator.o: $(hdrdir)/ruby/internal/iterator.h +generator.o: $(hdrdir)/ruby/internal/memory.h +generator.o: $(hdrdir)/ruby/internal/method.h +generator.o: $(hdrdir)/ruby/internal/module.h +generator.o: $(hdrdir)/ruby/internal/newobj.h +generator.o: $(hdrdir)/ruby/internal/scan_args.h +generator.o: $(hdrdir)/ruby/internal/special_consts.h +generator.o: $(hdrdir)/ruby/internal/static_assert.h +generator.o: $(hdrdir)/ruby/internal/stdalign.h +generator.o: $(hdrdir)/ruby/internal/stdbool.h +generator.o: $(hdrdir)/ruby/internal/stdckdint.h +generator.o: $(hdrdir)/ruby/internal/symbol.h +generator.o: $(hdrdir)/ruby/internal/value.h +generator.o: $(hdrdir)/ruby/internal/value_type.h +generator.o: $(hdrdir)/ruby/internal/variable.h +generator.o: $(hdrdir)/ruby/internal/warning_push.h +generator.o: $(hdrdir)/ruby/internal/xmalloc.h generator.o: $(hdrdir)/ruby/missing.h generator.o: $(hdrdir)/ruby/onigmo.h generator.o: $(hdrdir)/ruby/oniguruma.h @@ -19,6 +178,9 @@ generator.o: $(hdrdir)/ruby/ruby.h generator.o: $(hdrdir)/ruby/st.h generator.o: $(hdrdir)/ruby/subst.h generator.o: $(srcdir)/../fbuffer/fbuffer.h +generator.o: $(srcdir)/../json.h +generator.o: $(srcdir)/../simd/simd.h +generator.o: $(srcdir)/../vendor/fpconv.c +generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h generator.o: generator.c -generator.o: generator.h # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb index 8627c5f4bd..ee1bbeaba7 100644 --- a/ext/json/generator/extconf.rb +++ b/ext/json/generator/extconf.rb @@ -1,4 +1,16 @@ require 'mkmf' -$defs << "-DJSON_GENERATOR" -create_makefile 'json/ext/generator' +if RUBY_ENGINE == 'truffleruby' + # The pure-Ruby generator is faster on TruffleRuby, so skip compiling the generator extension + File.write('Makefile', dummy_makefile("").join) +else + append_cflags("-std=c99") + $defs << "-DJSON_GENERATOR" + $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" + + if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" + end + + create_makefile 'json/ext/generator' +end diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 036205d7cb..dbba99c455 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1,315 +1,706 @@ +#include "../json.h" #include "../fbuffer/fbuffer.h" -#include "generator.h" +#include "../vendor/fpconv.c" -#ifdef HAVE_RUBY_ENCODING_H -static VALUE CEncoding_UTF_8; -static ID i_encoding, i_encode; -#endif +#include <math.h> +#include <ctype.h> + +#include "../simd/simd.h" + +/* ruby api and some helpers */ + +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + VALUE as_json; + + long max_nesting; + long depth; + long buffer_initial_length; + + enum duplicate_key_action on_duplicate_key; + + bool as_json_single_arg; + bool allow_nan; + bool ascii_only; + bool script_safe; + bool strict; +} JSON_Generator_State; + +static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; + +static ID i_to_s, i_to_json, i_new, i_encode; +static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; + + +#define GET_STATE_TO(self, state) \ + TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + GET_STATE_TO(self, state) + +struct generate_json_data; + +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj); + +struct generate_json_data { + FBuffer *buffer; + VALUE vstate; + JSON_Generator_State *state; + VALUE obj; + generator_func func; + long depth; +}; -static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject, - mHash, mArray, +static SIMD_Implementation simd_impl; + +static VALUE cState_from_state_s(VALUE self, VALUE opts); +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj); #ifdef RUBY_INTEGER_UNIFICATION - mInteger, -#else - mFixnum, mBignum, +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj); #endif - mFloat, mString, mString_Extend, - mTrueClass, mFalseClass, mNilClass, eGeneratorError, - eNestingError, CRegexp_MULTILINE, CJSON_SAFE_STATE_PROTOTYPE, - i_SAFE_STATE_PROTOTYPE; +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj); -static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, - i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only, - i_pack, i_unpack, i_create_id, i_extend, i_key_p, - i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth, - i_buffer_initial_length, i_dup; +static int usascii_encindex, utf8_encindex, binary_encindex; -/* - * Copyright 2001-2004 Unicode, Inc. +NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str) +{ + rb_enc_associate_index(str, utf8_encindex); + VALUE exc = rb_exc_new_str(eGeneratorError, str); + rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object); + rb_exc_raise(exc); +} + +#ifdef RBIMPL_ATTR_FORMAT +RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) +#endif +NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + VALUE str = rb_vsprintf(fmt, args); + va_end(args); + raise_generator_error_str(invalid_object, str); +} + +// 0 - single byte char that don't need to be escaped. +// (x | 8) - char that needs to be escaped. +static const unsigned char CHAR_LENGTH_MASK = 7; +static const unsigned char ESCAPE_MASK = 8; + +typedef struct _search_state { + const char *ptr; + const char *end; + const char *cursor; + FBuffer *buffer; + +#ifdef HAVE_SIMD + const char *chunk_base; + const char *chunk_end; + bool has_matches; + +#if defined(HAVE_SIMD_NEON) + uint64_t matches_mask; +#elif defined(HAVE_SIMD_SSE2) + int matches_mask; +#else +#error "Unknown SIMD Implementation." +#endif /* HAVE_SIMD_NEON */ +#endif /* HAVE_SIMD */ +} search_state; + +ALWAYS_INLINE(static) void search_flush(search_state *search) +{ + // Do not remove this conditional without profiling, specifically escape-heavy text. + // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush). + // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method + // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the + // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if + // nothing needs to be flushed, we can save a few memory references with this conditional. + if (search->ptr > search->cursor) { + fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); + search->cursor = search->ptr; + } +} + +static const unsigned char escape_table_basic[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static inline unsigned char search_escape_basic(search_state *search) +{ + while (search->ptr < search->end) { + if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) { + search_flush(search); + return 1; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; +} + +ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + search->ptr++; + search->cursor = search->ptr; +} + +/* Converts in_string to a JSON string (without the wrapping '"' + * characters) in FBuffer out_buffer. + * + * Character are JSON-escaped according to: * - * Disclaimer + * - Always: ASCII control characters (0x00-0x1F), dquote, and + * backslash. * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. + * - If out_ascii_only: non-ASCII characters (>0x7F) * - * Limitations on Rights to Redistribute This Code + * - If script_safe: forwardslash (/), line separator (U+2028), and + * paragraph separator (U+2029) * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. + * Everything else (should be UTF-8) is just passed through and + * appended to the result. */ -/* - * Index into the table below with the first byte of a UTF-8 sequence to - * get the number of trailing bytes that are supposed to follow it. - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is - * left as-is for anyone who may want to do such conversion, which was - * allowed in earlier algorithms. - */ -static const char trailingBytesForUTF8[256] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 -}; -/* - * Magic values subtracted from a buffer value during UTF8 conversion. - * This table contains as many values as there might be trailing bytes - * in a UTF-8 sequence. - */ -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, - 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; +#if defined(HAVE_SIMD_NEON) +static inline unsigned char search_escape_basic_neon(search_state *search); +#elif defined(HAVE_SIMD_SSE2) +static inline unsigned char search_escape_basic_sse2(search_state *search); +#endif -/* - * Utility routine to tell whether a sequence of bytes is legal UTF-8. - * This must be called with the length pre-determined by the first byte. - * If not calling this from ConvertUTF8to*, then the length can be set by: - * length = trailingBytesForUTF8[*source]+1; - * and the sequence is illegal right away if there aren't that many bytes - * available. - * If presented with a length > 4, this returns 0. The Unicode - * definition of UTF-8 goes up to 4-byte sequences. - */ -static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length) -{ - UTF8 a; - const UTF8 *srcptr = source+length; - switch (length) { - default: return 0; - /* Everything else falls through when "1"... */ - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; - case 2: if ((a = (*--srcptr)) > 0xBF) return 0; - - switch (*source) { - /* no fall-through in this inner switch */ - case 0xE0: if (a < 0xA0) return 0; break; - case 0xED: if (a > 0x9F) return 0; break; - case 0xF0: if (a < 0x90) return 0; break; - case 0xF4: if (a > 0x8F) return 0; break; - default: if (a < 0x80) return 0; - } +static inline unsigned char search_escape_basic(search_state *search); - case 1: if (*source >= 0x80 && *source < 0xC2) return 0; +static inline void convert_UTF8_to_JSON(search_state *search) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + while (search_escape_basic_neon(search)) { + escape_UTF8_char_basic(search); } - if (*source > 0xF4) return 0; - return 1; +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + while (search_escape_basic_sse2(search)) { + escape_UTF8_char_basic(search); + } + return; + } + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif +#else + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif /* HAVE_SIMD */ } -/* Escapes the UTF16 character and stores the result in the buffer buf. */ -static void unicode_escape(char *buf, UTF16 character) +static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) { - const char *digits = "0123456789abcdef"; - - buf[2] = digits[character >> 12]; - buf[3] = digits[(character >> 8) & 0xf]; - buf[4] = digits[(character >> 4) & 0xf]; - buf[5] = digits[character & 0xf]; + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + break; + } + case 3: { + if (search->ptr[2] & 1) { + fbuffer_append(search->buffer, "\\u2029", 6); + } else { + fbuffer_append(search->buffer, "\\u2028", 6); + } + break; + } + } + search->cursor = (search->ptr += ch_len); } -/* Escapes the UTF16 character and stores the result in the buffer buf, then - * the buffer buf is appended to the FBuffer buffer. */ -static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 - character) +#ifdef HAVE_SIMD + +ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) { - unicode_escape(buf, character); - fbuffer_append(buffer, buf, 6); + RBIMPL_ASSERT_OR_ASSUME(len < vec_len); + + // Flush the buffer so everything up until the last 'len' characters are unflushed. + search_flush(search); + + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, vec_len); + + char *s = (buf->ptr + buf->len); + + // Pad the buffer with dummy characters that won't need escaping. + // This seem wasteful at first sight, but memset of vector length is very fast. + // This is a space as it can be directly represented as an immediate on AArch64. + memset(s, ' ', vec_len); + + // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + if (vec_len == 16) { + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); + json_fast_memcpy16(s, search->ptr, len); + } else { + MEMCPY(s, search->ptr, char, len); + } + + return s; } -/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII - * and control characters are JSON escaped. */ -static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string) +#ifdef HAVE_SIMD_NEON + +ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search) { - const UTF8 *source = (UTF8 *) RSTRING_PTR(string); - const UTF8 *sourceEnd = source + RSTRING_LEN(string); - char buf[6] = { '\\', 'u' }; + uint64_t mask = search->matches_mask; + uint32_t index = trailing_zeros64(mask) >> 2; + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} - while (source < sourceEnd) { - UTF32 ch = 0; - unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; - if (source + extraBytesToRead >= sourceEnd) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8(source, extraBytesToRead+1)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); +static inline unsigned char search_escape_basic_neon(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return neon_next_match(search); + } else { + // neon_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + search->ptr = search->chunk_end; } - /* - * The cases all fall through. See "Note A" below. - */ - switch (extraBytesToRead) { - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ - case 3: ch += *source++; ch <<= 6; - case 2: ch += *source++; ch <<= 6; - case 1: ch += *source++; ch <<= 6; - case 0: ch += *source++; + } + + /* + * The code below implements an SIMD-based algorithm to determine if N bytes at a time + * need to be escaped. + * + * Assume the ptr = "Te\sting!" (the double quotes are included in the string) + * + * The explanation will be limited to the first 8 bytes of the string for simplicity. However + * the vector insructions may work on larger vectors. + * + * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers. + * + * lower_bound: [20 20 20 20 20 20 20 20] + * backslash: [5C 5C 5C 5C 5C 5C 5C 5C] + * dblquote: [22 22 22 22 22 22 22 22] + * + * Next we load the first chunk of the ptr: + * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n) + * + * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector + * as no bytes are less than 32 (0x20): + * [0 0 0 0 0 0 0 0] + * + * Next, we check if any byte in chunk is equal to a backslash: + * [0 0 0 FF 0 0 0 0] + * + * Finally we check if any byte in chunk is equal to a double quote: + * [FF 0 0 0 0 0 0 0] + * + * Now we have three vectors where each byte indicates if the corresponding byte in chunk + * needs to be escaped. We combine these vectors with a series of logical OR instructions. + * This is the needs_escape vector and it is equal to: + * [FF 0 0 FF 0 0 0 0] + * + * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of + * the values in the vector. This computes how many bytes need to be escaped within this chunk. + * + * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then, + * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we + * have at least one byte that needs to be escaped. + */ + + if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(uint8x16_t); + return neon_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining); + + uint64_t mask = compute_chunk_mask_neon(s); + + if (!mask) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; } - ch -= offsetsFromUTF8[extraBytesToRead]; - - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the illegal value itself */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); + + search->matches_mask = mask; + search->has_matches = true; + search->chunk_end = search->end; + search->chunk_base = search->ptr; + return neon_next_match(search); + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} +#endif /* HAVE_SIMD_NEON */ + +#ifdef HAVE_SIMD_SSE2 + +ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search) +{ + int mask = search->matches_mask; + int index = trailing_zeros(mask); + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) #else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); +#define TARGET_SSE2 #endif + +ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return sse2_next_match(search); + } else { + // sse2_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) { + search->ptr = search->end; } else { - /* normal case */ - if (ch >= 0x20 && ch <= 0x7f) { - switch (ch) { - case '\\': - fbuffer_append(buffer, "\\\\", 2); - break; - case '"': - fbuffer_append(buffer, "\\\"", 2); - break; - default: - fbuffer_append_char(buffer, (char)ch); - break; - } - } else { - switch (ch) { - case '\n': - fbuffer_append(buffer, "\\n", 2); - break; - case '\r': - fbuffer_append(buffer, "\\r", 2); - break; - case '\t': - fbuffer_append(buffer, "\\t", 2); - break; - case '\f': - fbuffer_append(buffer, "\\f", 2); - break; - case '\b': - fbuffer_append(buffer, "\\b", 2); - break; - default: - unicode_escape_to_buffer(buffer, buf, (UTF16) ch); - break; + search->ptr = search->chunk_base + sizeof(__m128i); + } + } + } + + if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(__m128i); + return sse2_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining); + + int needs_escape_mask = compute_chunk_mask_sse2(s); + + if (needs_escape_mask == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + + search->has_matches = true; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} + +#endif /* HAVE_SIMD_SSE2 */ + +#endif /* HAVE_SIMD */ + +static const unsigned char script_safe_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; + +static inline unsigned char search_script_safe_escape(search_state *search) +{ + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = script_safe_escape_table[ch]; + + if (RB_UNLIKELY(ch_len)) { + if (ch_len & ESCAPE_MASK) { + if (RB_UNLIKELY(ch_len == 11)) { + const unsigned char *uptr = (const unsigned char *)search->ptr; + if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) { + search->ptr += 3; + continue; } } + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr += ch_len; } - } else if (ch > UNI_MAX_UTF16) { -#if UNI_STRICT_CONVERSION - source -= (extraBytesToRead+1); /* return to the start */ - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf8"); -#else - unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR); -#endif } else { - /* target is a character in range 0xFFFF - 0x10FFFF. */ - ch -= halfBase; - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START)); - unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START)); + search->ptr++; } } - RB_GC_GUARD(string); -} - -/* Converts string to a JSON string in FBuffer buffer, where only the - * characters required by the JSON standard are JSON escaped. The remaining - * characters (should be UTF8) are just passed through and appended to the - * result. */ -static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string) -{ - const char *ptr = RSTRING_PTR(string), *p; - unsigned long len = RSTRING_LEN(string), start = 0, end = 0; - const char *escape = NULL; - int escape_len; - unsigned char c; - char buf[6] = { '\\', 'u' }; - - for (start = 0, end = 0; end < len;) { - p = ptr + end; - c = (unsigned char) *p; - if (c < 0x20) { - switch (c) { - case '\n': - escape = "\\n"; - escape_len = 2; - break; - case '\r': - escape = "\\r"; - escape_len = 2; - break; - case '\t': - escape = "\\t"; - escape_len = 2; - break; - case '\f': - escape = "\\f"; - escape_len = 2; - break; - case '\b': - escape = "\\b"; - escape_len = 2; - break; - default: - unicode_escape(buf, (UTF16) *p); - escape = buf; - escape_len = 6; + search_flush(search); + return 0; +} + +static void convert_UTF8_to_script_safe_JSON(search_state *search) +{ + unsigned char ch_len; + while ((ch_len = search_script_safe_escape(search))) { + escape_UTF8_char(search, ch_len); + } +} + +static const unsigned char ascii_only_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; + +static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256]) +{ + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = escape_table[ch]; + + if (RB_UNLIKELY(ch_len)) { + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; +} + +static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); break; + } } - } else { - switch (c) { - case '\\': - escape = "\\\\"; - escape_len = 2; + break; + } + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; + + uint32_t wchar = 0; + + switch (ch_len) { + case 2: + wchar = ch & 0x1F; break; - case '"': - escape = "\\\""; - escape_len = 2; + case 3: + wchar = ch & 0x0F; break; - default: - { - unsigned short clen = trailingBytesForUTF8[c] + 1; - if (end + clen > len) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "partial character in source, but hit end"); - } - if (!isLegalUTF8((UTF8 *) p, clen)) { - rb_raise(rb_path2class("JSON::GeneratorError"), - "source sequence is illegal/malformed utf-8"); - } - end += clen; - } - continue; + case 4: + wchar = ch & 0x07; break; } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (search->ptr[i] & 0x3F); + } + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + } else { + uint16_t hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (uint16_t)(wchar >> 10); + lo = 0xDC00 + (uint16_t)(wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + fbuffer_append(search->buffer, scratch, 12); + } + + break; } - fbuffer_append(buffer, ptr + start, end - start); - fbuffer_append(buffer, escape, escape_len); - start = ++end; - escape = NULL; } - fbuffer_append(buffer, ptr + start, end - start); + search->cursor = (search->ptr += ch_len); } -static char *fstrndup(const char *ptr, unsigned long len) { - char *result; - if (len <= 0) return NULL; - result = ALLOC_N(char, len); - memcpy(result, ptr, len); - return result; +static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256]) +{ + unsigned char ch_len; + while ((ch_len = search_ascii_only_escape(search, escape_table))) { + full_escape_UTF8_char(search, ch_len); + } } /* @@ -324,6 +715,76 @@ static char *fstrndup(const char *ptr, unsigned long len) { * */ +/* Explanation of the following: that's the only way to not pollute + * standard library's docs with GeneratorMethods::<ClassName> which + * are uninformative and take a large place in a list of classes + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Array + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Bignum + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::FalseClass + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Fixnum + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Float + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Hash + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Integer + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::NilClass + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::Object + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::String + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::String::Extend + * :nodoc: + */ + +/* + * Document-module: JSON::Ext::Generator::GeneratorMethods::TrueClass + * :nodoc: + */ + /* * call-seq: to_json(state = nil) * @@ -334,7 +795,9 @@ static char *fstrndup(const char *ptr, unsigned long len) { */ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(object); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_object, Qfalse); } /* @@ -345,8 +808,11 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) * _state_ is a JSON::State object, that can also be used to configure the * produced JSON string output further. */ -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(array); +static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) +{ + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_array, Qfalse); } #ifdef RUBY_INTEGER_UNIFICATION @@ -357,7 +823,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { */ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(integer); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse); } #else @@ -368,7 +836,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(fixnum); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse); } /* @@ -378,7 +848,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(bignum); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse); } #endif @@ -389,17 +861,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(float); -} - -/* - * call-seq: String.included(modul) - * - * Extends _modul_ with the String::Extend module. - */ -static VALUE mString_included_s(VALUE self, VALUE modul) { - VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); - return result; + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_float, Qfalse); } /* @@ -411,52 +875,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) { */ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(string); -} - -/* - * call-seq: to_json_raw_object() - * - * This method creates a raw object hash, that can be nested into - * other data structures and will be generated as a raw string. This - * method should be used, if you want to convert raw strings to JSON - * instead of UTF-8 strings, e. g. binary data. - */ -static VALUE mString_to_json_raw_object(VALUE self) -{ - VALUE ary; - VALUE result = rb_hash_new(); - rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); - ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_str_new2("raw"), ary); - return result; -} - -/* - * call-seq: to_json_raw(*args) - * - * This method creates a JSON text from the result of a call to - * to_json_raw_object of this String. - */ -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) -{ - VALUE obj = mString_to_json_raw_object(self); - Check_Type(obj, T_HASH); - return mHash_to_json(argc, argv, obj); -} - -/* - * call-seq: json_create(o) - * - * Raw Strings are JSON Objects (the raw bytes are stored in an array for the - * key "raw"). The Ruby String can be created by this module method. - */ -static VALUE mString_Extend_json_create(VALUE self, VALUE o) -{ - VALUE ary; - Check_Type(o, T_HASH); - ary = rb_hash_aref(o, rb_str_new2("raw")); - return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); + rb_check_arity(argc, 0, 1); + VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); + return cState_partial_generate(Vstate, self, generate_json_string, Qfalse); } /* @@ -466,7 +887,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o) */ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(true); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("true", 4); } /* @@ -476,7 +898,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(false); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("false", 5); } /* @@ -486,7 +909,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) */ static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) { - GENERATE_JSON(null); + rb_check_arity(argc, 0, 1); + return rb_utf8_str_new("null", 4); } /* @@ -503,484 +927,641 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) rb_scan_args(argc, argv, "01", &state); Check_Type(string, T_STRING); state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string); + return cState_partial_generate(state, string, generate_json_string, Qfalse); +} + +static void State_mark(void *ptr) +{ + JSON_Generator_State *state = ptr; + rb_gc_mark_movable(state->indent); + rb_gc_mark_movable(state->space); + rb_gc_mark_movable(state->space_before); + rb_gc_mark_movable(state->object_nl); + rb_gc_mark_movable(state->array_nl); + rb_gc_mark_movable(state->as_json); +} + +static void State_compact(void *ptr) +{ + JSON_Generator_State *state = ptr; + state->indent = rb_gc_location(state->indent); + state->space = rb_gc_location(state->space); + state->space_before = rb_gc_location(state->space_before); + state->object_nl = rb_gc_location(state->object_nl); + state->array_nl = rb_gc_location(state->array_nl); + state->as_json = rb_gc_location(state->as_json); } static void State_free(void *ptr) { JSON_Generator_State *state = ptr; - if (state->indent) ruby_xfree(state->indent); - if (state->space) ruby_xfree(state->space); - if (state->space_before) ruby_xfree(state->space_before); - if (state->object_nl) ruby_xfree(state->object_nl); - if (state->array_nl) ruby_xfree(state->array_nl); - if (state->array_delim) fbuffer_free(state->array_delim); - if (state->object_delim) fbuffer_free(state->object_delim); - if (state->object_delim2) fbuffer_free(state->object_delim2); ruby_xfree(state); } static size_t State_memsize(const void *ptr) { - const JSON_Generator_State *state = ptr; - size_t size = sizeof(*state); - if (state->indent) size += state->indent_len + 1; - if (state->space) size += state->space_len + 1; - if (state->space_before) size += state->space_before_len + 1; - if (state->object_nl) size += state->object_nl_len + 1; - if (state->array_nl) size += state->array_nl_len + 1; - if (state->array_delim) size += FBUFFER_CAPA(state->array_delim); - if (state->object_delim) size += FBUFFER_CAPA(state->object_delim); - if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2); - return size; + return sizeof(JSON_Generator_State); } -#ifdef NEW_TYPEDDATA_WRAPPER static const rb_data_type_t JSON_Generator_State_type = { "JSON/Generator/State", - {NULL, State_free, State_memsize,}, -#ifdef RUBY_TYPED_FREE_IMMEDIATELY + { + .dmark = State_mark, + .dfree = State_free, + .dsize = State_memsize, + .dcompact = State_compact, + }, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -#endif + RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, }; -#endif + +static void state_init(JSON_Generator_State *state) +{ + state->max_nesting = 100; + state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; +} static VALUE cState_s_allocate(VALUE klass) { JSON_Generator_State *state; - return TypedData_Make_Struct(klass, JSON_Generator_State, - &JSON_Generator_State_type, state); + VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state); + state_init(state); + return obj; } -/* - * call-seq: configure(opts) - * - * Configure this State instance with the Hash _opts_, and return - * itself. - */ -static VALUE cState_configure(VALUE self, VALUE opts) +static void vstate_spill(struct generate_json_data *data) { - VALUE tmp; - GET_STATE(self); - tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h"); - opts = tmp; - tmp = rb_hash_aref(opts, ID2SYM(i_indent)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->indent = fstrndup(RSTRING_PTR(tmp), len + 1); - state->indent_len = len; - } - tmp = rb_hash_aref(opts, ID2SYM(i_space)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->space = fstrndup(RSTRING_PTR(tmp), len + 1); - state->space_len = len; + VALUE vstate = cState_s_allocate(cState); + GET_STATE(vstate); + MEMCPY(state, data->state, JSON_Generator_State, 1); + data->state = state; + data->vstate = vstate; + RB_OBJ_WRITTEN(vstate, Qundef, state->indent); + RB_OBJ_WRITTEN(vstate, Qundef, state->space); + RB_OBJ_WRITTEN(vstate, Qundef, state->space_before); + RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); +} + +static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) +{ + if (RB_UNLIKELY(!data->vstate)) { + vstate_spill(data); } - tmp = rb_hash_aref(opts, ID2SYM(i_space_before)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1); - state->space_before_len = len; + GET_STATE(data->vstate); + state->depth = data->depth; + VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate); + // no need to restore state->depth, vstate is just a temporary State + return tmp; +} + +static VALUE +json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key) +{ + VALUE proc_args[2] = {object, is_key}; + return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil); +} + +static VALUE +convert_string_subclass(VALUE key) +{ + VALUE key_to_s = rb_funcall(key, i_to_s, 0); + + if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) { + VALUE cname = rb_obj_class(key); + rb_raise(rb_eTypeError, + "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")", + cname, "String", cname, "to_s", rb_obj_class(key_to_s)); } - tmp = rb_hash_aref(opts, ID2SYM(i_array_nl)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1); - state->array_nl_len = len; + + return key_to_s; +} + +static bool enc_utf8_compatible_p(int enc_idx) +{ + if (enc_idx == usascii_encindex) return true; + if (enc_idx == utf8_encindex) return true; + return false; +} + +static VALUE encode_json_string_try(VALUE str) +{ + return rb_funcall(str, i_encode, 1, Encoding_UTF_8); +} + +static VALUE encode_json_string_rescue(VALUE str, VALUE exception) +{ + raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); + return Qundef; +} + +static inline bool valid_json_string_p(VALUE str) +{ + int coderange = rb_enc_str_coderange(str); + + if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) { + return true; } - tmp = rb_hash_aref(opts, ID2SYM(i_object_nl)); - if (RTEST(tmp)) { - unsigned long len; - Check_Type(tmp, T_STRING); - len = RSTRING_LEN(tmp); - state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1); - state->object_nl_len = len; + + if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) { + return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str)); } - tmp = ID2SYM(i_max_nesting); - state->max_nesting = 100; - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - state->max_nesting = FIX2LONG(max_nesting); - } else { - state->max_nesting = 0; - } + + return false; +} + +static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (RB_LIKELY(valid_json_string_p(str))) { + return str; } - tmp = ID2SYM(i_depth); - state->depth = 0; - if (option_given_p(opts, tmp)) { - VALUE depth = rb_hash_aref(opts, tmp); - if (RTEST(depth)) { - Check_Type(depth, T_FIXNUM); - state->depth = FIX2LONG(depth); - } else { - state->depth = 0; + + if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) { + VALUE coerced_str = json_call_as_json(data->state, str, Qfalse); + if (coerced_str != str) { + if (RB_TYPE_P(coerced_str, T_STRING)) { + if (!valid_json_string_p(coerced_str)) { + raise_generator_error(str, "source sequence is illegal/malformed utf-8"); + } + } else { + // as_json could return another type than T_STRING + if (is_key) { + raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str)); + } + } + + return coerced_str; } } - tmp = ID2SYM(i_buffer_initial_length); - if (option_given_p(opts, tmp)) { - VALUE buffer_initial_length = rb_hash_aref(opts, tmp); - if (RTEST(buffer_initial_length)) { - long initial_length; - Check_Type(buffer_initial_length, T_FIXNUM); - initial_length = FIX2LONG(buffer_initial_length); - if (initial_length > 0) state->buffer_initial_length = initial_length; + + if (RB_ENCODING_GET_INLINED(str) == binary_encindex) { + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + return utf8_string; + case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8_string; + break; } } - tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan)); - state->allow_nan = RTEST(tmp); - tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only)); - state->ascii_only = RTEST(tmp); - return self; + + return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); } -static void set_state_ivars(VALUE hash, VALUE state) +static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE ivars = rb_obj_instance_variables(state); - int i = 0; - for (i = 0; i < RARRAY_LEN(ivars); i++) { - VALUE key = rb_funcall(rb_ary_entry(ivars, i), i_to_s, 0); - long key_len = RSTRING_LEN(key); - VALUE value = rb_iv_get(state, StringValueCStr(key)); - rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value); + fbuffer_append_char(buffer, '"'); + + long len; + search_state search; + search.buffer = buffer; + RSTRING_GETMEM(obj, search.ptr, len); + search.cursor = search.ptr; + search.end = search.ptr + len; + +#ifdef HAVE_SIMD + search.matches_mask = 0; + search.has_matches = false; + search.chunk_base = NULL; + search.chunk_end = NULL; +#endif /* HAVE_SIMD */ + + switch (rb_enc_str_coderange(obj)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + if (RB_UNLIKELY(data->state->ascii_only)) { + convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); + } else if (RB_UNLIKELY(data->state->script_safe)) { + convert_UTF8_to_script_safe_JSON(&search); + } else { + convert_UTF8_to_JSON(&search); + } + break; + default: + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + break; } + fbuffer_append_char(buffer, '"'); } -/* - * call-seq: to_h - * - * Returns the configuration instance variables as a hash, that can be - * passed to the configure method. - */ -static VALUE cState_to_h(VALUE self) +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE result = rb_hash_new(); - GET_STATE(self); - set_state_ivars(result, self); - rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len)); - rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len)); - rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len)); - rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len)); - rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len)); - rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse); - rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse); - rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting)); - rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth)); - rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length)); - return result; + obj = ensure_valid_encoding(data, obj, false, false); + raw_generate_json_string(buffer, data, obj); } -/* -* call-seq: [](name) -* -* Returns the value returned by method +name+. -*/ -static VALUE cState_aref(VALUE self, VALUE name) +struct hash_foreach_arg { + VALUE hash; + struct generate_json_data *data; + int first_key_type; + bool first; + bool mixed_keys_encountered; +}; + +NOINLINE(static) void +json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg) { - name = rb_funcall(name, i_to_s, 0); - if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) { - return rb_funcall(self, i_send, 1, name); - } else { - return rb_attr_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name))); + if (arg->mixed_keys_encountered) { + return; + } + arg->mixed_keys_encountered = true; + + JSON_Generator_State *state = arg->data->state; + if (state->on_duplicate_key != JSON_IGNORE) { + VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse; + rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise); } } -/* -* call-seq: []=(name, value) -* -* Sets the attribute name to value. -*/ -static VALUE cState_aset(VALUE self, VALUE name, VALUE value) +static int +json_object_i(VALUE key, VALUE val, VALUE _arg) { - VALUE name_writer; + struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg; + struct generate_json_data *data = arg->data; + + FBuffer *buffer = data->buffer; + JSON_Generator_State *state = data->state; + + long depth = data->depth; + int key_type = rb_type(key); - name = rb_funcall(name, i_to_s, 0); - name_writer = rb_str_cat2(rb_str_dup(name), "="); - if (RTEST(rb_funcall(self, i_respond_to_p, 1, name_writer))) { - return rb_funcall(self, i_send, 2, name_writer, value); + if (arg->first) { + arg->first = false; + arg->first_key_type = key_type; + } + else { + fbuffer_append_char(buffer, ','); + } + + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); + } + + VALUE key_to_s; + bool as_json_called = false; + + start: + switch (key_type) { + case T_STRING: + if (RB_UNLIKELY(arg->first_key_type != T_STRING)) { + json_inspect_hash_with_mixed_keys(arg); + } + + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { + key_to_s = key; + } else { + key_to_s = convert_string_subclass(key); + } + break; + case T_SYMBOL: + if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) { + json_inspect_hash_with_mixed_keys(arg); + } + + key_to_s = rb_sym2str(key); + break; + default: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + key = json_call_as_json(data->state, key, Qtrue); + key_type = rb_type(key); + as_json_called = true; + goto start; + } else { + raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key)); + } + } + key_to_s = rb_convert_type(key, T_STRING, "String", "to_s"); + break; + } + + key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true); + + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { + raw_generate_json_string(buffer, data, key_to_s); } else { - rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value); + generate_json(buffer, data, key_to_s); } - return Qnil; + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before); + fbuffer_append_char(buffer, ':'); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); + generate_json(buffer, data, val); + + return ST_CONTINUE; } -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) -{ - char *object_nl = state->object_nl; - long object_nl_len = state->object_nl_len; - char *indent = state->indent; - long indent_len = state->indent_len; - long max_nesting = state->max_nesting; - char *delim = FBUFFER_PTR(state->object_delim); - long delim_len = FBUFFER_LEN(state->object_delim); - char *delim2 = FBUFFER_PTR(state->object_delim2); - long delim2_len = FBUFFER_LEN(state->object_delim2); - long depth = ++state->depth; - int i, j; - VALUE key, key_to_s, keys; - if (max_nesting != 0 && depth > max_nesting) { - fbuffer_free(buffer); - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); +static inline long increase_depth(struct generate_json_data *data) +{ + JSON_Generator_State *state = data->state; + long depth = ++data->depth; + if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) { + rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth); } - fbuffer_append_char(buffer, '{'); - keys = rb_funcall(obj, i_keys, 0); - for(i = 0; i < RARRAY_LEN(keys); i++) { - if (i > 0) fbuffer_append(buffer, delim, delim_len); - if (object_nl) { - fbuffer_append(buffer, object_nl, object_nl_len); - } - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } - } - key = rb_ary_entry(keys, i); - key_to_s = rb_funcall(key, i_to_s, 0); - Check_Type(key_to_s, T_STRING); - generate_json(buffer, Vstate, state, key_to_s); - fbuffer_append(buffer, delim2, delim2_len); - generate_json(buffer, Vstate, state, rb_hash_aref(obj, key)); + return depth; +} + +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + long depth = increase_depth(data); + + if (RHASH_SIZE(obj) == 0) { + fbuffer_append(buffer, "{}", 2); + --data->depth; + return; } - depth = --state->depth; - if (object_nl) { - fbuffer_append(buffer, object_nl, object_nl_len); - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } + + fbuffer_append_char(buffer, '{'); + + struct hash_foreach_arg arg = { + .hash = obj, + .data = data, + .first = true, + }; + rb_hash_foreach(obj, json_object_i, (VALUE)&arg); + + depth = --data->depth; + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) -{ - char *array_nl = state->array_nl; - long array_nl_len = state->array_nl_len; - char *indent = state->indent; - long indent_len = state->indent_len; - long max_nesting = state->max_nesting; - char *delim = FBUFFER_PTR(state->array_delim); - long delim_len = FBUFFER_LEN(state->array_delim); - long depth = ++state->depth; - int i, j; - if (max_nesting != 0 && depth > max_nesting) { - fbuffer_free(buffer); - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + long depth = increase_depth(data); + + if (RARRAY_LEN(obj) == 0) { + fbuffer_append(buffer, "[]", 2); + --data->depth; + return; } + fbuffer_append_char(buffer, '['); - if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len); - for(i = 0; i < RARRAY_LEN(obj); i++) { - if (i > 0) fbuffer_append(buffer, delim, delim_len); - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); + for (int i = 0; i < RARRAY_LEN(obj); i++) { + if (i > 0) { + fbuffer_append_char(buffer, ','); + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); + } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } - generate_json(buffer, Vstate, state, rb_ary_entry(obj, i)); + generate_json(buffer, data, RARRAY_AREF(obj, i)); } - state->depth = --depth; - if (array_nl) { - fbuffer_append(buffer, array_nl, array_nl_len); - if (indent) { - for (j = 0; j < depth; j++) { - fbuffer_append(buffer, indent, indent_len); - } + data->depth = --depth; + if (RB_UNLIKELY(data->state->array_nl)) { + fbuffer_append_str(buffer, data->state->array_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, ']'); } -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - fbuffer_append_char(buffer, '"'); -#ifdef HAVE_RUBY_ENCODING_H - obj = rb_funcall(obj, i_encode, 1, CEncoding_UTF_8); -#endif - if (state->ascii_only) { - convert_UTF8_to_JSON_ASCII(buffer, obj); + VALUE tmp; + if (rb_respond_to(obj, i_to_json)) { + tmp = json_call_to_json(data, obj); + Check_Type(tmp, T_STRING); + fbuffer_append_str(buffer, tmp); } else { - convert_UTF8_to_JSON(buffer, obj); + tmp = rb_funcall(obj, i_to_s, 0); + Check_Type(tmp, T_STRING); + generate_json_string(buffer, data, tmp); } - fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + if (data->state->strict) { + generate_json_string(buffer, data, rb_sym2str(obj)); + } else { + generate_json_fallback(buffer, data, obj); + } +} + +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); fbuffer_append_str(buffer, tmp); } #ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, obj); else - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, obj); } #endif -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) + +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { double value = RFLOAT_VALUE(obj); - char allow_nan = state->allow_nan; - VALUE tmp = rb_funcall(obj, i_to_s, 0); - if (!allow_nan) { - if (isinf(value)) { - fbuffer_free(buffer); - rb_raise(eGeneratorError, "%u: %"PRIsVALUE" not allowed in JSON", __LINE__, RB_OBJ_STRING(tmp)); - } else if (isnan(value)) { - fbuffer_free(buffer); - rb_raise(eGeneratorError, "%u: %"PRIsVALUE" not allowed in JSON", __LINE__, RB_OBJ_STRING(tmp)); + char allow_nan = data->state->allow_nan; + if (isinf(value) || isnan(value)) { + /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */ + if (!allow_nan) { + if (data->state->strict && data->state->as_json) { + VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse); + if (casted_obj != obj) { + increase_depth(data); + generate_json(buffer, data, casted_obj); + data->depth--; + return; + } + } + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0)); } + + VALUE tmp = rb_funcall(obj, i_to_s, 0); + fbuffer_append_str(buffer, tmp); + return; } - fbuffer_append_str(buffer, tmp); + + /* This implementation writes directly into the buffer. We reserve + * the 32 characters that fpconv_dtoa states as its maximum. + */ + fbuffer_inc_capa(buffer, 32); + char* d = buffer->ptr + buffer->len; + int len = fpconv_dtoa(value, d); + /* fpconv_dtoa converts a float to its shortest string representation, + * but it adds a ".0" if this is a plain integer. + */ + fbuffer_consumed(buffer, len); } -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE tmp; - VALUE klass = CLASS_OF(obj); - if (klass == rb_cHash) { - generate_json_object(buffer, Vstate, state, obj); - } else if (klass == rb_cArray) { - generate_json_array(buffer, Vstate, state, obj); - } else if (klass == rb_cString) { - generate_json_string(buffer, Vstate, state, obj); - } else if (obj == Qnil) { - generate_json_null(buffer, Vstate, state, obj); + VALUE fragment = RSTRUCT_GET(obj, 0); + Check_Type(fragment, T_STRING); + fbuffer_append_str(buffer, fragment); +} + +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + bool as_json_called = false; +start: + if (obj == Qnil) { + generate_json_null(buffer, data, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, Vstate, state, obj); + generate_json_false(buffer, data, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, Vstate, state, obj); - } else if (FIXNUM_P(obj)) { - generate_json_fixnum(buffer, Vstate, state, obj); - } else if (RB_TYPE_P(obj, T_BIGNUM)) { - generate_json_bignum(buffer, Vstate, state, obj); - } else if (klass == rb_cFloat) { - generate_json_float(buffer, Vstate, state, obj); - } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, Vstate); - Check_Type(tmp, T_STRING); - fbuffer_append_str(buffer, tmp); + generate_json_true(buffer, data, obj); + } else if (RB_SPECIAL_CONST_P(obj)) { + if (RB_FIXNUM_P(obj)) { + generate_json_fixnum(buffer, data, obj); + } else if (RB_FLONUM_P(obj)) { + generate_json_float(buffer, data, obj); + } else if (RB_STATIC_SYM_P(obj)) { + generate_json_symbol(buffer, data, obj); + } else { + goto general; + } } else { - tmp = rb_funcall(obj, i_to_s, 0); - Check_Type(tmp, T_STRING); - generate_json_string(buffer, Vstate, state, tmp); + VALUE klass = RBASIC_CLASS(obj); + switch (RB_BUILTIN_TYPE(obj)) { + case T_BIGNUM: + generate_json_bignum(buffer, data, obj); + break; + case T_HASH: + if (klass != rb_cHash) goto general; + generate_json_object(buffer, data, obj); + break; + case T_ARRAY: + if (klass != rb_cArray) goto general; + generate_json_array(buffer, data, obj); + break; + case T_STRING: + if (klass != rb_cString) goto general; + + if (RB_LIKELY(valid_json_string_p(obj))) { + raw_generate_json_string(buffer, data, obj); + } else if (as_json_called) { + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + } else { + obj = ensure_valid_encoding(data, obj, false, false); + as_json_called = true; + goto start; + } + break; + case T_SYMBOL: + generate_json_symbol(buffer, data, obj); + break; + case T_FLOAT: + if (klass != rb_cFloat) goto general; + generate_json_float(buffer, data, obj); + break; + case T_STRUCT: + if (klass != cFragment) goto general; + generate_json_fragment(buffer, data, obj); + break; + default: + general: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + obj = json_call_as_json(data->state, obj, Qfalse); + as_json_called = true; + goto start; + } else { + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); + } + } else { + generate_json_fallback(buffer, data, obj); + } + } } } -static FBuffer *cState_prepare_buffer(VALUE self) +static VALUE generate_json_try(VALUE d) { - FBuffer *buffer; - GET_STATE(self); - buffer = fbuffer_alloc(state->buffer_initial_length); + struct generate_json_data *data = (struct generate_json_data *)d; - if (state->object_delim) { - fbuffer_clear(state->object_delim); - } else { - state->object_delim = fbuffer_alloc(16); - } - fbuffer_append_char(state->object_delim, ','); - if (state->object_delim2) { - fbuffer_clear(state->object_delim2); - } else { - state->object_delim2 = fbuffer_alloc(16); - } - if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len); - fbuffer_append_char(state->object_delim2, ':'); - if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len); + data->func(data->buffer, data, data->obj); - if (state->array_delim) { - fbuffer_clear(state->array_delim); - } else { - state->array_delim = fbuffer_alloc(16); - } - fbuffer_append_char(state->array_delim, ','); - if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len); - return buffer; + return fbuffer_finalize(data->buffer); } -static VALUE cState_partial_generate(VALUE self, VALUE obj) +static VALUE generate_json_ensure(VALUE d) { - FBuffer *buffer = cState_prepare_buffer(self); - GET_STATE(self); - generate_json(buffer, self, state, obj); - return fbuffer_to_s(buffer); + struct generate_json_data *data = (struct generate_json_data *)d; + fbuffer_free(data->buffer); + + return Qundef; } -/* - * call-seq: generate(obj) +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) +{ + GET_STATE(self); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; + fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json + .state = state, + .depth = state->depth, + .obj = obj, + .func = func + }; + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} + +/* call-seq: + * generate(obj) -> String + * generate(obj, anIO) -> anIO * * Generates a valid JSON document from object +obj+ and returns the * result. If no valid JSON document can be created this method raises a * GeneratorError exception. */ -static VALUE cState_generate(VALUE self, VALUE obj) +static VALUE cState_generate(int argc, VALUE *argv, VALUE self) { - VALUE result = cState_partial_generate(self, obj); - GET_STATE(self); - (void)state; - return result; + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json, io); } -/* - * call-seq: new(opts = {}) - * - * Instantiates a new State object, configured by _opts_. - * - * _opts_ can have the following keys: - * - * * *indent*: a string used to indent levels (default: ''), - * * *space*: a string that is put after, a : or , delimiter (default: ''), - * * *space_before*: a string that is put before a : pair delimiter (default: ''), - * * *object_nl*: a string that is put at the end of a JSON object (default: ''), - * * *array_nl*: a string that is put at the end of a JSON array (default: ''), - * * *allow_nan*: true if NaN, Infinity, and -Infinity should be - * generated, otherwise an exception is thrown, if these values are - * encountered. This options defaults to false. - * * *buffer_initial_length*: sets the initial length of the generator's - * internal buffer. - */ static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) { - VALUE opts; - GET_STATE(self); - state->max_nesting = 100; - state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - rb_scan_args(argc, argv, "01", &opts); - if (!NIL_P(opts)) cState_configure(self, opts); + rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`"); return self; } @@ -1000,14 +1581,12 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = fstrndup(origState->indent, origState->indent_len); - objState->space = fstrndup(origState->space, origState->space_len); - objState->space_before = fstrndup(origState->space_before, origState->space_before_len); - objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len); - objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len); - if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim); - if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim); - if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2); + objState->indent = origState->indent; + objState->space = origState->space; + objState->space_before = origState->space_before; + objState->object_nl = origState->object_nl; + objState->array_nl = origState->array_nl; + objState->as_json = origState->as_json; return obj; } @@ -1025,10 +1604,7 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts) } else if (rb_obj_is_kind_of(opts, rb_cHash)) { return rb_funcall(self, i_new, 1, opts); } else { - if (NIL_P(CJSON_SAFE_STATE_PROTOTYPE)) { - CJSON_SAFE_STATE_PROTOTYPE = rb_const_get(mJSON, i_SAFE_STATE_PROTOTYPE); - } - return rb_funcall(CJSON_SAFE_STATE_PROTOTYPE, i_dup, 0); + return rb_class_new_instance(0, NULL, cState); } } @@ -1040,7 +1616,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts) static VALUE cState_indent(VALUE self) { GET_STATE(self); - return state->indent ? rb_str_new(state->indent, state->indent_len) : rb_str_new2(""); + return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0)); +} + +static VALUE string_config(VALUE config) +{ + if (RTEST(config)) { + Check_Type(config, T_STRING); + if (RSTRING_LEN(config)) { + return rb_str_new_frozen(config); + } + } + return Qfalse; } /* @@ -1050,21 +1637,9 @@ static VALUE cState_indent(VALUE self) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(indent, T_STRING); - len = RSTRING_LEN(indent); - if (len == 0) { - if (state->indent) { - ruby_xfree(state->indent); - state->indent = NULL; - state->indent_len = 0; - } - } else { - if (state->indent) ruby_xfree(state->indent); - state->indent = fstrndup(RSTRING_PTR(indent), len); - state->indent_len = len; - } + RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; } @@ -1077,7 +1652,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent) static VALUE cState_space(VALUE self) { GET_STATE(self); - return state->space ? rb_str_new(state->space, state->space_len) : rb_str_new2(""); + return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1088,21 +1663,9 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(space, T_STRING); - len = RSTRING_LEN(space); - if (len == 0) { - if (state->space) { - ruby_xfree(state->space); - state->space = NULL; - state->space_len = 0; - } - } else { - if (state->space) ruby_xfree(state->space); - state->space = fstrndup(RSTRING_PTR(space), len); - state->space_len = len; - } + RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; } @@ -1114,7 +1677,7 @@ static VALUE cState_space_set(VALUE self, VALUE space) static VALUE cState_space_before(VALUE self) { GET_STATE(self); - return state->space_before ? rb_str_new(state->space_before, state->space_before_len) : rb_str_new2(""); + return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1124,21 +1687,9 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(space_before, T_STRING); - len = RSTRING_LEN(space_before); - if (len == 0) { - if (state->space_before) { - ruby_xfree(state->space_before); - state->space_before = NULL; - state->space_before_len = 0; - } - } else { - if (state->space_before) ruby_xfree(state->space_before); - state->space_before = fstrndup(RSTRING_PTR(space_before), len); - state->space_before_len = len; - } + RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; } @@ -1151,7 +1702,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before) static VALUE cState_object_nl(VALUE self) { GET_STATE(self); - return state->object_nl ? rb_str_new(state->object_nl, state->object_nl_len) : rb_str_new2(""); + return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1162,20 +1713,9 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(object_nl, T_STRING); - len = RSTRING_LEN(object_nl); - if (len == 0) { - if (state->object_nl) { - ruby_xfree(state->object_nl); - state->object_nl = NULL; - } - } else { - if (state->object_nl) ruby_xfree(state->object_nl); - state->object_nl = fstrndup(RSTRING_PTR(object_nl), len); - state->object_nl_len = len; - } + RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; } @@ -1187,7 +1727,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) static VALUE cState_array_nl(VALUE self) { GET_STATE(self); - return state->array_nl ? rb_str_new(state->array_nl, state->array_nl_len) : rb_str_new2(""); + return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0)); } /* @@ -1197,23 +1737,35 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { - unsigned long len; + rb_check_frozen(self); GET_STATE(self); - Check_Type(array_nl, T_STRING); - len = RSTRING_LEN(array_nl); - if (len == 0) { - if (state->array_nl) { - ruby_xfree(state->array_nl); - state->array_nl = NULL; - } - } else { - if (state->array_nl) ruby_xfree(state->array_nl); - state->array_nl = fstrndup(RSTRING_PTR(array_nl), len); - state->array_nl_len = len; - } + RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; } +/* + * call-seq: as_json() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json(VALUE self) +{ + GET_STATE(self); + return state->as_json; +} + +/* + * call-seq: as_json=(as_json) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json_set(VALUE self, VALUE as_json) +{ + rb_check_frozen(self); + GET_STATE(self); + RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc")); + return Qnil; +} /* * call-seq: check_circular? @@ -1239,6 +1791,11 @@ static VALUE cState_max_nesting(VALUE self) return LONG2FIX(state->max_nesting); } +static long long_config(VALUE num) +{ + return RTEST(num) ? FIX2LONG(num) : 0; +} + /* * call-seq: max_nesting=(depth) * @@ -1247,9 +1804,68 @@ static VALUE cState_max_nesting(VALUE self) */ static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { + rb_check_frozen(self); + GET_STATE(self); + state->max_nesting = long_config(depth); + return Qnil; +} + +/* + * call-seq: script_safe + * + * If this boolean is true, the forward slashes will be escaped in + * the json output. + */ +static VALUE cState_script_safe(VALUE self) +{ + GET_STATE(self); + return state->script_safe ? Qtrue : Qfalse; +} + +/* + * call-seq: script_safe=(enable) + * + * This sets whether or not the forward slashes will be escaped in + * the json output. + */ +static VALUE cState_script_safe_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->script_safe = RTEST(enable); + return Qnil; +} + +/* + * call-seq: strict + * + * If this boolean is false, types unsupported by the JSON format will + * be serialized as strings. + * If this boolean is true, types unsupported by the JSON format will + * raise a JSON::GeneratorError. + */ +static VALUE cState_strict(VALUE self) +{ GET_STATE(self); - Check_Type(depth, T_FIXNUM); - return state->max_nesting = FIX2LONG(depth); + return state->strict ? Qtrue : Qfalse; +} + +/* + * call-seq: strict=(enable) + * + * This sets whether or not to serialize types unsupported by the + * JSON format as strings. + * If this boolean is false, types unsupported by the JSON format will + * be serialized as strings. + * If this boolean is true, types unsupported by the JSON format will + * raise a JSON::GeneratorError. + */ +static VALUE cState_strict_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->strict = RTEST(enable); + return Qnil; } /* @@ -1265,6 +1881,19 @@ static VALUE cState_allow_nan_p(VALUE self) } /* + * call-seq: allow_nan=(enable) + * + * This sets whether or not to serialize NaN, Infinity, and -Infinity + */ +static VALUE cState_allow_nan_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->allow_nan = RTEST(enable); + return Qnil; +} + +/* * call-seq: ascii_only? * * Returns true, if only ASCII characters should be generated. Otherwise @@ -1277,6 +1906,32 @@ static VALUE cState_ascii_only_p(VALUE self) } /* + * call-seq: ascii_only=(enable) + * + * This sets whether only ASCII characters should be generated. + */ +static VALUE cState_ascii_only_set(VALUE self, VALUE enable) +{ + rb_check_frozen(self); + GET_STATE(self); + state->ascii_only = RTEST(enable); + return Qnil; +} + +static VALUE cState_allow_duplicate_key_p(VALUE self) +{ + GET_STATE(self); + switch (state->on_duplicate_key) { + case JSON_IGNORE: + return Qtrue; + case JSON_DEPRECATED: + return Qnil; + default: + return Qfalse; + } +} + +/* * call-seq: depth * * This integer returns the current depth of data structure nesting. @@ -1295,9 +1950,9 @@ static VALUE cState_depth(VALUE self) */ static VALUE cState_depth_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); - Check_Type(depth, T_FIXNUM); - state->depth = FIX2LONG(depth); + state->depth = long_config(depth); return Qnil; } @@ -1312,6 +1967,15 @@ static VALUE cState_buffer_initial_length(VALUE self) return LONG2FIX(state->buffer_initial_length); } +static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length) +{ + Check_Type(buffer_initial_length, T_FIXNUM); + long initial_length = FIX2LONG(buffer_initial_length); + if (initial_length > 0) { + state->buffer_initial_length = initial_length; + } +} + /* * call-seq: buffer_initial_length=(length) * @@ -1320,37 +1984,135 @@ static VALUE cState_buffer_initial_length(VALUE self) */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { - long initial_length; + rb_check_frozen(self); GET_STATE(self); - Check_Type(buffer_initial_length, T_FIXNUM); - initial_length = FIX2LONG(buffer_initial_length); - if (initial_length > 0) { - state->buffer_initial_length = initial_length; - } + buffer_initial_length_set(state, buffer_initial_length); return Qnil; } +struct configure_state_data { + JSON_Generator_State *state; + VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated +}; + +static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value) +{ + if (RTEST(data->vstate)) { + RB_OBJ_WRITE(data->vstate, field, value); + } else { + *field = value; + } +} + +static int configure_state_i(VALUE key, VALUE val, VALUE _arg) +{ + struct configure_state_data *data = (struct configure_state_data *)_arg; + JSON_Generator_State *state = data->state; + + if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); } + else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); } + else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); } + else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); } + else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); } + else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } + else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } + else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } + else if (key == sym_depth) { state->depth = long_config(val); } + else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } + else if (key == sym_script_safe) { state->script_safe = RTEST(val); } + else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } + else if (key == sym_strict) { state->strict = RTEST(val); } + else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_as_json) { + VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; + state->as_json_single_arg = proc && rb_proc_arity(proc) == 1; + state_write_value(data, &state->as_json, proc); + } + return ST_CONTINUE; +} + +static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config) +{ + if (!RTEST(config)) return; + + Check_Type(config, T_HASH); + + if (!RHASH_SIZE(config)) return; + + struct configure_state_data data = { + .state = state, + .vstate = vstate + }; + + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(config, configure_state_i, (VALUE)&data); +} + +static VALUE cState_configure(VALUE self, VALUE opts) +{ + rb_check_frozen(self); + GET_STATE(self); + configure_state(state, self, opts); + return self; +} + +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + JSON_Generator_State state = {0}; + state_init(&state); + configure_state(&state, Qfalse, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; + fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, + .state = &state, + .depth = state.depth, + .obj = obj, + .func = generate_json, + }; + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} + /* * */ void Init_generator(void) { +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + #undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - mGenerator = rb_define_module_under(mExt, "Generator"); + rb_global_variable(&cFragment); + cFragment = rb_const_get(mJSON, rb_intern("Fragment")); + + VALUE mExt = rb_define_module_under(mJSON, "Ext"); + VALUE mGenerator = rb_define_module_under(mExt, "Generator"); + + rb_global_variable(&eGeneratorError); eGeneratorError = rb_path2class("JSON::GeneratorError"); + + rb_global_variable(&eNestingError); eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eGeneratorError); - rb_gc_register_mark_object(eNestingError); cState = rb_define_class_under(mGenerator, "State", rb_cObject); rb_define_alloc_func(cState, cState_s_allocate); rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1); rb_define_method(cState, "initialize", cState_initialize, -1); + rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings + rb_define_private_method(cState, "_configure", cState_configure, 1); + rb_define_method(cState, "initialize_copy", cState_init_copy, 1); rb_define_method(cState, "indent", cState_indent, 0); rb_define_method(cState, "indent=", cState_indent_set, 1); @@ -1362,85 +2124,99 @@ void Init_generator(void) rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); rb_define_method(cState, "array_nl", cState_array_nl, 0); rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "as_json", cState_as_json, 0); + rb_define_method(cState, "as_json=", cState_as_json_set, 1); rb_define_method(cState, "max_nesting", cState_max_nesting, 0); rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); + rb_define_method(cState, "script_safe", cState_script_safe, 0); + rb_define_method(cState, "script_safe?", cState_script_safe, 0); + rb_define_method(cState, "script_safe=", cState_script_safe_set, 1); + rb_define_alias(cState, "escape_slash", "script_safe"); + rb_define_alias(cState, "escape_slash?", "script_safe?"); + rb_define_alias(cState, "escape_slash=", "script_safe="); + rb_define_method(cState, "strict", cState_strict, 0); + rb_define_method(cState, "strict?", cState_strict, 0); + rb_define_method(cState, "strict=", cState_strict_set, 1); rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0); + rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1); rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0); + rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1); rb_define_method(cState, "depth", cState_depth, 0); rb_define_method(cState, "depth=", cState_depth_set, 1); rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); - rb_define_method(cState, "configure", cState_configure, 1); - rb_define_alias(cState, "merge", "configure"); - rb_define_method(cState, "to_h", cState_to_h, 0); - rb_define_alias(cState, "to_hash", "to_h"); - rb_define_method(cState, "[]", cState_aref, 1); - rb_define_method(cState, "[]=", cState_aset, 2); - rb_define_method(cState, "generate", cState_generate, 1); - - mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); - mObject = rb_define_module_under(mGeneratorMethods, "Object"); + rb_define_method(cState, "generate", cState_generate, -1); + + rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); + + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); + + VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); + + VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); rb_define_method(mObject, "to_json", mObject_to_json, -1); - mHash = rb_define_module_under(mGeneratorMethods, "Hash"); + + VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash"); rb_define_method(mHash, "to_json", mHash_to_json, -1); - mArray = rb_define_module_under(mGeneratorMethods, "Array"); + + VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array"); rb_define_method(mArray, "to_json", mArray_to_json, -1); + #ifdef RUBY_INTEGER_UNIFICATION - mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); + VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); rb_define_method(mInteger, "to_json", mInteger_to_json, -1); #else - mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum"); + VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum"); rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1); - mBignum = rb_define_module_under(mGeneratorMethods, "Bignum"); + + VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum"); rb_define_method(mBignum, "to_json", mBignum_to_json, -1); #endif - mFloat = rb_define_module_under(mGeneratorMethods, "Float"); + VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float"); rb_define_method(mFloat, "to_json", mFloat_to_json, -1); - mString = rb_define_module_under(mGeneratorMethods, "String"); - rb_define_singleton_method(mString, "included", mString_included_s, 1); + + VALUE mString = rb_define_module_under(mGeneratorMethods, "String"); rb_define_method(mString, "to_json", mString_to_json, -1); - rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); - rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); - mString_Extend = rb_define_module_under(mString, "Extend"); - rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); - mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); + + VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); - mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); + + VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); - mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); + + VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); - CRegexp_MULTILINE = rb_const_get(rb_cRegexp, rb_intern("MULTILINE")); + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + i_to_s = rb_intern("to_s"); i_to_json = rb_intern("to_json"); i_new = rb_intern("new"); - i_indent = rb_intern("indent"); - i_space = rb_intern("space"); - i_space_before = rb_intern("space_before"); - i_object_nl = rb_intern("object_nl"); - i_array_nl = rb_intern("array_nl"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_ascii_only = rb_intern("ascii_only"); - i_depth = rb_intern("depth"); - i_buffer_initial_length = rb_intern("buffer_initial_length"); - i_pack = rb_intern("pack"); - i_unpack = rb_intern("unpack"); - i_create_id = rb_intern("create_id"); - i_extend = rb_intern("extend"); - i_key_p = rb_intern("key?"); - i_aref = rb_intern("[]"); - i_send = rb_intern("__send__"); - i_respond_to_p = rb_intern("respond_to?"); - i_match = rb_intern("match"); - i_keys = rb_intern("keys"); - i_dup = rb_intern("dup"); -#ifdef HAVE_RUBY_ENCODING_H - CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8")); - i_encoding = rb_intern("encoding"); i_encode = rb_intern("encode"); -#endif - i_SAFE_STATE_PROTOTYPE = rb_intern("SAFE_STATE_PROTOTYPE"); - CJSON_SAFE_STATE_PROTOTYPE = Qnil; + + sym_indent = ID2SYM(rb_intern("indent")); + sym_space = ID2SYM(rb_intern("space")); + sym_space_before = ID2SYM(rb_intern("space_before")); + sym_object_nl = ID2SYM(rb_intern("object_nl")); + sym_array_nl = ID2SYM(rb_intern("array_nl")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_ascii_only = ID2SYM(rb_intern("ascii_only")); + sym_depth = ID2SYM(rb_intern("depth")); + sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length")); + sym_script_safe = ID2SYM(rb_intern("script_safe")); + sym_escape_slash = ID2SYM(rb_intern("escape_slash")); + sym_strict = ID2SYM(rb_intern("strict")); + sym_as_json = ID2SYM(rb_intern("as_json")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + + usascii_encindex = rb_usascii_encindex(); + utf8_encindex = rb_utf8_encindex(); + binary_encindex = rb_ascii8bit_encindex(); + + rb_require("json/ext/generator/state"); + + simd_impl = find_simd_implementation(); } diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h deleted file mode 100644 index c367a6209a..0000000000 --- a/ext/json/generator/generator.h +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef _GENERATOR_H_ -#define _GENERATOR_H_ - -#include <math.h> -#include <ctype.h> - -#include "ruby.h" - -#ifdef HAVE_RUBY_RE_H -#include "ruby/re.h" -#else -#include "re.h" -#endif - -#ifndef rb_intern_str -#define rb_intern_str(string) SYM2ID(rb_str_intern(string)) -#endif - -#ifndef rb_obj_instance_variables -#define rb_obj_instance_variables(object) rb_funcall(object, rb_intern("instance_variables"), 0) -#endif - -#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) - -/* unicode definitions */ - -#define UNI_STRICT_CONVERSION 1 - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -static const int halfShift = 10; /* used for shifting by 10 bits */ - -static const UTF32 halfBase = 0x0010000UL; -static const UTF32 halfMask = 0x3FFUL; - -static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length); -static void unicode_escape(char *buf, UTF16 character); -static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character); -static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string); -static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string); -static char *fstrndup(const char *ptr, unsigned long len); - -/* ruby api and some helpers */ - -typedef struct JSON_Generator_StateStruct { - char *indent; - long indent_len; - char *space; - long space_len; - char *space_before; - long space_before_len; - char *object_nl; - long object_nl_len; - char *array_nl; - long array_nl_len; - FBuffer *array_delim; - FBuffer *object_delim; - FBuffer *object_delim2; - long max_nesting; - char allow_nan; - char ascii_only; - long depth; - long buffer_initial_length; -} JSON_Generator_State; - -#define GET_STATE_TO(self, state) \ - TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) - -#define GET_STATE(self) \ - JSON_Generator_State *state; \ - GET_STATE_TO(self, state) - -#define GENERATE_JSON(type) \ - FBuffer *buffer; \ - VALUE Vstate; \ - JSON_Generator_State *state; \ - \ - rb_scan_args(argc, argv, "01", &Vstate); \ - Vstate = cState_from_state_s(cState, Vstate); \ - TypedData_Get_Struct(Vstate, JSON_Generator_State, &JSON_Generator_State_type, state); \ - buffer = cState_prepare_buffer(Vstate); \ - generate_json_##type(buffer, Vstate, state, self); \ - return fbuffer_to_s(buffer) - -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); -#ifdef RUBY_INTEGER_UNIFICATION -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); -#else -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self); -#endif -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_included_s(VALUE self, VALUE modul); -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_to_json_raw_object(VALUE self); -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); -static VALUE mString_Extend_json_create(VALUE self, VALUE o); -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); -static void State_free(void *state); -static VALUE cState_s_allocate(VALUE klass); -static VALUE cState_configure(VALUE self, VALUE opts); -static VALUE cState_to_h(VALUE self); -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -#endif -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -static VALUE cState_partial_generate(VALUE self, VALUE obj); -static VALUE cState_generate(VALUE self, VALUE obj); -static VALUE cState_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_indent(VALUE self); -static VALUE cState_indent_set(VALUE self, VALUE indent); -static VALUE cState_space(VALUE self); -static VALUE cState_space_set(VALUE self, VALUE space); -static VALUE cState_space_before(VALUE self); -static VALUE cState_space_before_set(VALUE self, VALUE space_before); -static VALUE cState_object_nl(VALUE self); -static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); -static VALUE cState_array_nl(VALUE self); -static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); -static VALUE cState_max_nesting(VALUE self); -static VALUE cState_max_nesting_set(VALUE self, VALUE depth); -static VALUE cState_allow_nan_p(VALUE self); -static VALUE cState_ascii_only_p(VALUE self); -static VALUE cState_depth(VALUE self); -static VALUE cState_depth_set(VALUE self, VALUE depth); -static FBuffer *cState_prepare_buffer(VALUE self); -#ifndef ZALLOC -#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type))) -static inline void *ruby_zalloc(size_t n) -{ - void *p = ruby_xmalloc(n); - memset(p, 0, n); - return p; -} -#endif -#ifdef TypedData_Make_Struct -static const rb_data_type_t JSON_Generator_State_type; -#define NEW_TYPEDDATA_WRAPPER 1 -#else -#define TypedData_Make_Struct(klass, type, ignore, json) Data_Make_Struct(klass, type, NULL, State_free, json) -#define TypedData_Get_Struct(self, JSON_Generator_State, ignore, json) Data_Get_Struct(self, JSON_Generator_State, json) -#endif - -#endif diff --git a/ext/json/json.gemspec b/ext/json/json.gemspec index 1cb236d2bc..5575731025 100644 --- a/ext/json/json.gemspec +++ b/ext/json/json.gemspec @@ -1,26 +1,62 @@ -# -*- encoding: utf-8 -*- +# frozen_string_literal: true + +version = File.foreach(File.join(__dir__, "lib/json/version.rb")) do |line| + /^\s*VERSION\s*=\s*'(.*)'/ =~ line and break $1 +end rescue nil + +spec = Gem::Specification.new do |s| + java_ext = Gem::Platform === s.platform && s.platform =~ 'java' || RUBY_ENGINE == 'jruby' -Gem::Specification.new do |s| s.name = "json" - s.version = "2.2.0" - - s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= - s.require_paths = ["lib"] - s.authors = ["Florian Frank"] - s.date = "2019-02-21" - s.description = "This is a JSON implementation as a Ruby extension in C." - s.email = "flori@ping.de" - s.extensions = ["ext/json/ext/generator/extconf.rb", "ext/json/ext/parser/extconf.rb", "ext/json/extconf.rb"] - s.extra_rdoc_files = ["README.md"] - s.files = ["./tests/test_helper.rb", ".gitignore", ".travis.yml", "CHANGES.md", "Gemfile", "README-json-jruby.md", "README.md", "Rakefile", "VERSION", "diagrams/.keep", "ext/json/ext/fbuffer/fbuffer.h", "ext/json/ext/generator/depend", "ext/json/ext/generator/extconf.rb", "ext/json/ext/generator/generator.c", "ext/json/ext/generator/generator.h", "ext/json/ext/parser/depend", "ext/json/ext/parser/extconf.rb", "ext/json/ext/parser/parser.c", "ext/json/ext/parser/parser.h", "ext/json/ext/parser/parser.rl", "ext/json/extconf.rb", "install.rb", "java/src/json/ext/ByteListTranscoder.java", "java/src/json/ext/Generator.java", "java/src/json/ext/GeneratorMethods.java", "java/src/json/ext/GeneratorService.java", "java/src/json/ext/GeneratorState.java", "java/src/json/ext/OptionsReader.java", "java/src/json/ext/Parser.java", "java/src/json/ext/Parser.rl", "java/src/json/ext/ParserService.java", "java/src/json/ext/RuntimeInfo.java", "java/src/json/ext/StringDecoder.java", "java/src/json/ext/StringEncoder.java", "java/src/json/ext/Utils.java", "json-java.gemspec", "json.gemspec", "json_pure.gemspec", "lib/json.rb", "lib/json/add/bigdecimal.rb", "lib/json/add/complex.rb", "lib/json/add/core.rb", "lib/json/add/date.rb", "lib/json/add/date_time.rb", "lib/json/add/exception.rb", "lib/json/add/ostruct.rb", "lib/json/add/range.rb", "lib/json/add/rational.rb", "lib/json/add/regexp.rb", "lib/json/add/set.rb", "lib/json/add/struct.rb", "lib/json/add/symbol.rb", "lib/json/add/time.rb", "lib/json/common.rb", "lib/json/ext.rb", "lib/json/ext/.keep", "lib/json/generic_object.rb", "lib/json/pure.rb", "lib/json/pure/generator.rb", "lib/json/pure/parser.rb", "lib/json/version.rb", "references/rfc7159.txt", "tests/fixtures/fail10.json", "tests/fixtures/fail11.json", "tests/fixtures/fail12.json", "tests/fixtures/fail13.json", "tests/fixtures/fail14.json", "tests/fixtures/fail18.json", "tests/fixtures/fail19.json", "tests/fixtures/fail2.json", "tests/fixtures/fail20.json", "tests/fixtures/fail21.json", "tests/fixtures/fail22.json", "tests/fixtures/fail23.json", "tests/fixtures/fail24.json", "tests/fixtures/fail25.json", "tests/fixtures/fail27.json", "tests/fixtures/fail28.json", "tests/fixtures/fail3.json", "tests/fixtures/fail4.json", "tests/fixtures/fail5.json", "tests/fixtures/fail6.json", "tests/fixtures/fail7.json", "tests/fixtures/fail8.json", "tests/fixtures/fail9.json", "tests/fixtures/obsolete_fail1.json", "tests/fixtures/pass1.json", "tests/fixtures/pass15.json", "tests/fixtures/pass16.json", "tests/fixtures/pass17.json", "tests/fixtures/pass2.json", "tests/fixtures/pass26.json", "tests/fixtures/pass3.json", "tests/json_addition_test.rb", "tests/json_common_interface_test.rb", "tests/json_encoding_test.rb", "tests/json_ext_parser_test.rb", "tests/json_fixtures_test.rb", "tests/json_generator_test.rb", "tests/json_generic_object_test.rb", "tests/json_parser_test.rb", "tests/json_string_matching_test.rb", "tests/test_helper.rb", "tools/diff.sh", "tools/fuzz.rb", "tools/server.rb"] - s.homepage = "http://flori.github.com/json" - s.licenses = ["Ruby"] - s.rdoc_options = ["--title", "JSON implemention for Ruby", "--main", "README.md"] - s.required_ruby_version = Gem::Requirement.new(">= 1.9") - s.rubygems_version = "3.0.2" + s.version = version + s.summary = "JSON Implementation for Ruby" - s.test_files = ["./tests/test_helper.rb"] + s.homepage = "https://github.com/ruby/json" + s.metadata = { + 'bug_tracker_uri' => 'https://github.com/ruby/json/issues', + 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md', + 'documentation_uri' => 'https://docs.ruby-lang.org/en/master/JSON.html', + 'homepage_uri' => s.homepage, + 'source_code_uri' => 'https://github.com/ruby/json', + } + + s.required_ruby_version = Gem::Requirement.new(">= 2.7") + + if java_ext + s.description = "A JSON implementation as a JRuby extension." + s.author = "Daniel Luz" + s.email = "dev+ruby@mernen.com" + else + s.description = "This is a JSON implementation as a Ruby extension in C." + s.authors = ["Florian Frank"] + s.email = "flori@ping.de" + end + + s.licenses = ["Ruby"] + + s.extra_rdoc_files = ["README.md"] + s.rdoc_options = ["--title", "JSON implementation for Ruby", "--main", "README.md"] + + s.files = [ + "CHANGES.md", + "COPYING", + "BSDL", + "LEGAL", + "README.md", + "json.gemspec", + ] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__)) + + if java_ext + s.platform = 'java' + s.files += Dir["lib/json/ext/**/*.jar"] + else + s.extensions = Dir["ext/json/**/extconf.rb"] + s.files += Dir["ext/json/**/*.{c,h,rb}"] + end +end - s.add_development_dependency("rake", [">= 0"]) - s.add_development_dependency("test-unit", ["~> 2.0"]) +if RUBY_ENGINE == 'jruby' && $0 == __FILE__ + Gem::Builder.new(spec).build +else + spec end diff --git a/ext/json/json.h b/ext/json/json.h new file mode 100644 index 0000000000..9379d7ae7f --- /dev/null +++ b/ext/json/json.h @@ -0,0 +1,101 @@ +#ifndef _JSON_H_ +#define _JSON_H_ + +#include "ruby.h" +#include "ruby/encoding.h" +#include <stdint.h> + +#ifndef RBIMPL_ASSERT_OR_ASSUME +# define RBIMPL_ASSERT_OR_ASSUME(x) +#endif + +#if defined(RUBY_DEBUG) && RUBY_DEBUG +# define JSON_ASSERT RUBY_ASSERT +#else +# ifdef JSON_DEBUG +# include <assert.h> +# define JSON_ASSERT(x) assert(x) +# else +# define JSON_ASSERT(x) +# endif +#endif + +/* shims */ + +#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG +# define INT64T2NUM(x) LL2NUM(x) +# define UINT64T2NUM(x) ULL2NUM(x) +#elif SIZEOF_UINT64_T == SIZEOF_LONG +# define INT64T2NUM(x) LONG2NUM(x) +# define UINT64T2NUM(x) ULONG2NUM(x) +#else +# error No uint64_t conversion +#endif + +/* This is the fallback definition from Ruby 3.4 */ +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include <cstdbool> +# endif +#elif defined(HAVE_STDBOOL_H) +# include <stdbool.h> +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef HAVE_RB_EXT_RACTOR_SAFE +# undef RUBY_TYPED_FROZEN_SHAREABLE +# define RUBY_TYPED_FROZEN_SHAREABLE 0 +#endif + +#ifndef NORETURN +#define NORETURN(x) x +#endif + +#ifndef NOINLINE +#if defined(__has_attribute) && __has_attribute(noinline) +#define NOINLINE(x) __attribute__((noinline)) x +#else +#define NOINLINE(x) x +#endif +#endif + +#ifndef ALWAYS_INLINE +#if defined(__has_attribute) && __has_attribute(always_inline) +#define ALWAYS_INLINE(x) inline __attribute__((always_inline)) x +#else +#define ALWAYS_INLINE(x) inline x +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + +#ifdef RUBY_DEBUG +#ifndef JSON_DEBUG +#define JSON_DEBUG RUBY_DEBUG +#endif +#endif + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX +#define JSON_CPU_LITTLE_ENDIAN_64BITS 1 +#else +#define JSON_CPU_LITTLE_ENDIAN_64BITS 0 +#endif + +#endif // _JSON_H_ diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb index b5a6912415..f619d93252 100644 --- a/ext/json/lib/json.rb +++ b/ext/json/lib/json.rb @@ -1,63 +1,654 @@ -#frozen_string_literal: false +# frozen_string_literal: true require 'json/common' ## -# = JavaScript Object Notation (JSON) +# = JavaScript \Object Notation (\JSON) # -# JSON is a lightweight data-interchange format. It is easy for us -# humans to read and write. Plus, equally simple for machines to generate or parse. -# JSON is completely language agnostic, making it the ideal interchange format. +# \JSON is a lightweight data-interchange format. # -# Built on two universally available structures: -# 1. A collection of name/value pairs. Often referred to as an _object_, hash table, record, struct, keyed list, or associative array. -# 2. An ordered list of values. More commonly called an _array_, vector, sequence or list. +# A \JSON value is one of the following: +# - Double-quoted text: <tt>"foo"</tt>. +# - Number: +1+, +1.0+, +2.0e2+. +# - Boolean: +true+, +false+. +# - Null: +null+. +# - \Array: an ordered list of values, enclosed by square brackets: +# ["foo", 1, 1.0, 2.0e2, true, false, null] # -# To read more about JSON visit: http://json.org +# - \Object: a collection of name/value pairs, enclosed by curly braces; +# each name is double-quoted text; +# the values may be any \JSON values: +# {"a": "foo", "b": 1, "c": 1.0, "d": 2.0e2, "e": true, "f": false, "g": null} # -# == Parsing JSON +# A \JSON array or object may contain nested arrays, objects, and scalars +# to any depth: +# {"foo": {"bar": 1, "baz": 2}, "bat": [0, 1, 2]} +# [{"foo": 0, "bar": 1}, ["baz", 2]] # -# To parse a JSON string received by another application or generated within -# your existing application: +# == Using \Module \JSON # +# To make module \JSON available in your code, begin with: # require 'json' # -# my_hash = JSON.parse('{"hello": "goodbye"}') -# puts my_hash["hello"] => "goodbye" +# All examples here assume that this has been done. # -# Notice the extra quotes <tt>''</tt> around the hash notation. Ruby expects -# the argument to be a string and can't convert objects like a hash or array. +# === Parsing \JSON # -# Ruby converts your string into a hash +# You can parse a \String containing \JSON data using +# either of two methods: +# - <tt>JSON.parse(source, opts)</tt> +# - <tt>JSON.parse!(source, opts)</tt> # -# == Generating JSON +# where +# - +source+ is a Ruby object. +# - +opts+ is a \Hash object containing options +# that control both input allowed and output formatting. # -# Creating a JSON string for communication or serialization is -# just as simple. +# The difference between the two methods +# is that JSON.parse! omits some checks +# and may not be safe for some +source+ data; +# use it only for data from trusted sources. +# Use the safer method JSON.parse for less trusted sources. # -# require 'json' +# ==== Parsing \JSON Arrays # -# my_hash = {:hello => "goodbye"} -# puts JSON.generate(my_hash) => "{\"hello\":\"goodbye\"}" +# When +source+ is a \JSON array, JSON.parse by default returns a Ruby \Array: +# json = '["foo", 1, 1.0, 2.0e2, true, false, null]' +# ruby = JSON.parse(json) +# ruby # => ["foo", 1, 1.0, 200.0, true, false, nil] +# ruby.class # => Array # -# Or an alternative way: +# The \JSON array may contain nested arrays, objects, and scalars +# to any depth: +# json = '[{"foo": 0, "bar": 1}, ["baz", 2]]' +# JSON.parse(json) # => [{"foo"=>0, "bar"=>1}, ["baz", 2]] # -# require 'json' -# puts {:hello => "goodbye"}.to_json => "{\"hello\":\"goodbye\"}" +# ==== Parsing \JSON \Objects +# +# When the source is a \JSON object, JSON.parse by default returns a Ruby \Hash: +# json = '{"a": "foo", "b": 1, "c": 1.0, "d": 2.0e2, "e": true, "f": false, "g": null}' +# ruby = JSON.parse(json) +# ruby # => {"a"=>"foo", "b"=>1, "c"=>1.0, "d"=>200.0, "e"=>true, "f"=>false, "g"=>nil} +# ruby.class # => Hash +# +# The \JSON object may contain nested arrays, objects, and scalars +# to any depth: +# json = '{"foo": {"bar": 1, "baz": 2}, "bat": [0, 1, 2]}' +# JSON.parse(json) # => {"foo"=>{"bar"=>1, "baz"=>2}, "bat"=>[0, 1, 2]} +# +# ==== Parsing \JSON Scalars +# +# When the source is a \JSON scalar (not an array or object), +# JSON.parse returns a Ruby scalar. +# +# \String: +# ruby = JSON.parse('"foo"') +# ruby # => 'foo' +# ruby.class # => String +# \Integer: +# ruby = JSON.parse('1') +# ruby # => 1 +# ruby.class # => Integer +# \Float: +# ruby = JSON.parse('1.0') +# ruby # => 1.0 +# ruby.class # => Float +# ruby = JSON.parse('2.0e2') +# ruby # => 200 +# ruby.class # => Float +# Boolean: +# ruby = JSON.parse('true') +# ruby # => true +# ruby.class # => TrueClass +# ruby = JSON.parse('false') +# ruby # => false +# ruby.class # => FalseClass +# Null: +# ruby = JSON.parse('null') +# ruby # => nil +# ruby.class # => NilClass +# +# ==== Parsing Options +# +# ====== Input Options +# +# Option +max_nesting+ (\Integer) specifies the maximum nesting depth allowed; +# defaults to +100+; specify +false+ to disable depth checking. +# +# With the default, +false+: +# source = '[0, [1, [2, [3]]]]' +# ruby = JSON.parse(source) +# ruby # => [0, [1, [2, [3]]]] +# Too deep: +# # Raises JSON::NestingError (nesting of 2 is too deep): +# JSON.parse(source, {max_nesting: 1}) +# Bad value: +# # Raises TypeError (wrong argument type Symbol (expected Fixnum)): +# JSON.parse(source, {max_nesting: :foo}) +# +# --- +# +# Option +allow_duplicate_key+ specifies whether duplicate keys in objects +# should be ignored or cause an error to be raised: +# +# When not specified: +# # The last value is used and a deprecation warning emitted. +# JSON.parse('{"a": 1, "a":2}') => {"a" => 2} +# # warning: detected duplicate keys in JSON object. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# +# When set to `+true+` +# # The last value is used. +# JSON.parse('{"a": 1, "a":2}') => {"a" => 2} +# +# When set to `+false+`, the future default: +# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError) +# +# --- +# +# Option +allow_nan+ (boolean) specifies whether to allow +# NaN, Infinity, and MinusInfinity in +source+; +# defaults to +false+. +# +# With the default, +false+: +# # Raises JSON::ParserError (225: unexpected token at '[NaN]'): +# JSON.parse('[NaN]') +# # Raises JSON::ParserError (232: unexpected token at '[Infinity]'): +# JSON.parse('[Infinity]') +# # Raises JSON::ParserError (248: unexpected token at '[-Infinity]'): +# JSON.parse('[-Infinity]') +# Allow: +# source = '[NaN, Infinity, -Infinity]' +# ruby = JSON.parse(source, {allow_nan: true}) +# ruby # => [NaN, Infinity, -Infinity] +# +# --- +# +# Option +allow_trailing_comma+ (boolean) specifies whether to allow +# trailing commas in objects and arrays; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse('[1,]') # unexpected character: ']' at line 1 column 4 (JSON::ParserError) +# +# When enabled: +# JSON.parse('[1,]', allow_trailing_comma: true) # => [1] +# +# --- +# +# Option +allow_control_characters+ (boolean) specifies whether to allow +# unescaped ASCII control characters, such as newlines, in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld" +# +# ====== Output Options +# +# Option +freeze+ (boolean) specifies whether the returned objects will be frozen; +# defaults to +false+. +# +# Option +symbolize_names+ (boolean) specifies whether returned \Hash keys +# should be Symbols; +# defaults to +false+ (use Strings). +# +# With the default, +false+: +# source = '{"a": "foo", "b": 1.0, "c": true, "d": false, "e": null}' +# ruby = JSON.parse(source) +# ruby # => {"a"=>"foo", "b"=>1.0, "c"=>true, "d"=>false, "e"=>nil} +# Use Symbols: +# ruby = JSON.parse(source, {symbolize_names: true}) +# ruby # => {:a=>"foo", :b=>1.0, :c=>true, :d=>false, :e=>nil} +# +# --- +# +# Option +object_class+ (\Class) specifies the Ruby class to be used +# for each \JSON object; +# defaults to \Hash. +# +# With the default, \Hash: +# source = '{"a": "foo", "b": 1.0, "c": true, "d": false, "e": null}' +# ruby = JSON.parse(source) +# ruby.class # => Hash +# Use class \OpenStruct: +# ruby = JSON.parse(source, {object_class: OpenStruct}) +# ruby # => #<OpenStruct a="foo", b=1.0, c=true, d=false, e=nil> +# +# --- +# +# Option +array_class+ (\Class) specifies the Ruby class to be used +# for each \JSON array; +# defaults to \Array. +# +# With the default, \Array: +# source = '["foo", 1.0, true, false, null]' +# ruby = JSON.parse(source) +# ruby.class # => Array +# Use class \Set: +# ruby = JSON.parse(source, {array_class: Set}) +# ruby # => #<Set: {"foo", 1.0, true, false, nil}> +# +# --- +# +# Option +create_additions+ (boolean) specifies whether to use \JSON additions in parsing. +# See {\JSON Additions}[#module-JSON-label-JSON+Additions]. +# +# === Generating \JSON +# +# To generate a Ruby \String containing \JSON data, +# use method <tt>JSON.generate(source, opts)</tt>, where +# - +source+ is a Ruby object. +# - +opts+ is a \Hash object containing options +# that control both input allowed and output formatting. +# +# ==== Generating \JSON from Arrays +# +# When the source is a Ruby \Array, JSON.generate returns +# a \String containing a \JSON array: +# ruby = [0, 's', :foo] +# json = JSON.generate(ruby) +# json # => '[0,"s","foo"]' +# +# The Ruby \Array array may contain nested arrays, hashes, and scalars +# to any depth: +# ruby = [0, [1, 2], {foo: 3, bar: 4}] +# json = JSON.generate(ruby) +# json # => '[0,[1,2],{"foo":3,"bar":4}]' +# +# ==== Generating \JSON from Hashes +# +# When the source is a Ruby \Hash, JSON.generate returns +# a \String containing a \JSON object: +# ruby = {foo: 0, bar: 's', baz: :bat} +# json = JSON.generate(ruby) +# json # => '{"foo":0,"bar":"s","baz":"bat"}' +# +# The Ruby \Hash array may contain nested arrays, hashes, and scalars +# to any depth: +# ruby = {foo: [0, 1], bar: {baz: 2, bat: 3}, bam: :bad} +# json = JSON.generate(ruby) +# json # => '{"foo":[0,1],"bar":{"baz":2,"bat":3},"bam":"bad"}' +# +# ==== Generating \JSON from Other Objects +# +# When the source is neither an \Array nor a \Hash, +# the generated \JSON data depends on the class of the source. +# +# When the source is a Ruby \Integer or \Float, JSON.generate returns +# a \String containing a \JSON number: +# JSON.generate(42) # => '42' +# JSON.generate(0.42) # => '0.42' +# +# When the source is a Ruby \String, JSON.generate returns +# a \String containing a \JSON string (with double-quotes): +# JSON.generate('A string') # => '"A string"' +# +# When the source is +true+, +false+ or +nil+, JSON.generate returns +# a \String containing the corresponding \JSON token: +# JSON.generate(true) # => 'true' +# JSON.generate(false) # => 'false' +# JSON.generate(nil) # => 'null' +# +# When the source is none of the above, JSON.generate returns +# a \String containing a \JSON string representation of the source: +# JSON.generate(:foo) # => '"foo"' +# JSON.generate(Complex(0, 0)) # => '"0+0i"' +# JSON.generate(Dir.new('.')) # => '"#<Dir>"' # -# <tt>JSON.generate</tt> only allows objects or arrays to be converted -# to JSON syntax. <tt>to_json</tt>, however, accepts many Ruby classes -# even though it acts only as a method for serialization: +# ==== Generating Options # +# ====== Input Options +# +# Option +allow_nan+ (boolean) specifies whether +# +NaN+, +Infinity+, and <tt>-Infinity</tt> may be generated; +# defaults to +false+. +# +# With the default, +false+: +# # Raises JSON::GeneratorError (920: NaN not allowed in JSON): +# JSON.generate(JSON::NaN) +# # Raises JSON::GeneratorError (917: Infinity not allowed in JSON): +# JSON.generate(JSON::Infinity) +# # Raises JSON::GeneratorError (917: -Infinity not allowed in JSON): +# JSON.generate(JSON::MinusInfinity) +# +# Allow: +# ruby = [Float::NaN, Float::Infinity, Float::MinusInfinity] +# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,-Infinity]' +# +# --- +# +# Option +allow_duplicate_key+ (boolean) specifies whether +# hashes with duplicate keys should be allowed or produce an error. +# defaults to emit a deprecation warning. +# +# With the default, (not set): +# Warning[:deprecated] = true +# JSON.generate({ foo: 1, "foo" => 2 }) +# # warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# # => '{"foo":1,"foo":2}' +# +# With <tt>false</tt> +# JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false) +# # detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError) +# +# In version 3.0, <tt>false</tt> will become the default. +# +# --- +# +# Option +max_nesting+ (\Integer) specifies the maximum nesting depth +# in +obj+; defaults to +100+. +# +# With the default, +100+: +# obj = [[[[[[0]]]]]] +# JSON.generate(obj) # => '[[[[[[0]]]]]]' +# +# Too deep: +# # Raises JSON::NestingError (nesting of 2 is too deep): +# JSON.generate(obj, max_nesting: 2) +# +# ====== Escaping Options +# +# Options +script_safe+ (boolean) specifies wether <tt>'\u2028'</tt>, <tt>'\u2029'</tt> +# and <tt>'/'</tt> should be escaped as to make the JSON object safe to interpolate in script +# tags. +# +# Options +ascii_only+ (boolean) specifies wether all characters outside the ASCII range +# should be escaped. +# +# ====== Output Options +# +# The default formatting options generate the most compact +# \JSON data, all on one line and with no whitespace. +# +# You can use these formatting options to generate +# \JSON data in a more open format, using whitespace. +# See also JSON.pretty_generate. +# +# - Option +array_nl+ (\String) specifies a string (usually a newline) +# to be inserted after each \JSON array; defaults to the empty \String, <tt>''</tt>. +# - Option +object_nl+ (\String) specifies a string (usually a newline) +# to be inserted after each \JSON object; defaults to the empty \String, <tt>''</tt>. +# - Option +indent+ (\String) specifies the string (usually spaces) to be +# used for indentation; defaults to the empty \String, <tt>''</tt>; +# defaults to the empty \String, <tt>''</tt>; +# has no effect unless options +array_nl+ or +object_nl+ specify newlines. +# - Option +space+ (\String) specifies a string (usually a space) to be +# inserted after the colon in each \JSON object's pair; +# defaults to the empty \String, <tt>''</tt>. +# - Option +space_before+ (\String) specifies a string (usually a space) to be +# inserted before the colon in each \JSON object's pair; +# defaults to the empty \String, <tt>''</tt>. +# +# In this example, +obj+ is used first to generate the shortest +# \JSON data (no whitespace), then again with all formatting options +# specified: +# +# obj = {foo: [:bar, :baz], bat: {bam: 0, bad: 1}} +# json = JSON.generate(obj) +# puts 'Compact:', json +# opts = { +# array_nl: "\n", +# object_nl: "\n", +# indent: ' ', +# space_before: ' ', +# space: ' ' +# } +# puts 'Open:', JSON.generate(obj, opts) +# +# Output: +# Compact: +# {"foo":["bar","baz"],"bat":{"bam":0,"bad":1}} +# Open: +# { +# "foo" : [ +# "bar", +# "baz" +# ], +# "bat" : { +# "bam" : 0, +# "bad" : 1 +# } +# } +# +# == \JSON Additions +# +# Note that JSON Additions must only be used with trusted data, and is +# deprecated. +# +# When you "round trip" a non-\String object from Ruby to \JSON and back, +# you have a new \String, instead of the object you began with: +# ruby0 = Range.new(0, 2) +# json = JSON.generate(ruby0) +# json # => '0..2"' +# ruby1 = JSON.parse(json) +# ruby1 # => '0..2' +# ruby1.class # => String +# +# You can use \JSON _additions_ to preserve the original object. +# The addition is an extension of a ruby class, so that: +# - \JSON.generate stores more information in the \JSON string. +# - \JSON.parse, called with option +create_additions+, +# uses that information to create a proper Ruby object. +# +# This example shows a \Range being generated into \JSON +# and parsed back into Ruby, both without and with +# the addition for \Range: +# ruby = Range.new(0, 2) +# # This passage does not use the addition for Range. +# json0 = JSON.generate(ruby) +# ruby0 = JSON.parse(json0) +# # This passage uses the addition for Range. +# require 'json/add/range' +# json1 = JSON.generate(ruby) +# ruby1 = JSON.parse(json1, create_additions: true) +# # Make a nice display. +# display = <<~EOT +# Generated JSON: +# Without addition: #{json0} (#{json0.class}) +# With addition: #{json1} (#{json1.class}) +# Parsed JSON: +# Without addition: #{ruby0.inspect} (#{ruby0.class}) +# With addition: #{ruby1.inspect} (#{ruby1.class}) +# EOT +# puts display +# +# This output shows the different results: +# Generated JSON: +# Without addition: "0..2" (String) +# With addition: {"json_class":"Range","a":[0,2,false]} (String) +# Parsed JSON: +# Without addition: "0..2" (String) +# With addition: 0..2 (Range) +# +# The \JSON module includes additions for certain classes. +# You can also craft custom additions. +# See {Custom \JSON Additions}[#module-JSON-label-Custom+JSON+Additions]. +# +# === Built-in Additions +# +# The \JSON module includes additions for certain classes. +# To use an addition, +require+ its source: +# - BigDecimal: <tt>require 'json/add/bigdecimal'</tt> +# - Complex: <tt>require 'json/add/complex'</tt> +# - Date: <tt>require 'json/add/date'</tt> +# - DateTime: <tt>require 'json/add/date_time'</tt> +# - Exception: <tt>require 'json/add/exception'</tt> +# - OpenStruct: <tt>require 'json/add/ostruct'</tt> +# - Range: <tt>require 'json/add/range'</tt> +# - Rational: <tt>require 'json/add/rational'</tt> +# - Regexp: <tt>require 'json/add/regexp'</tt> +# - Set: <tt>require 'json/add/set'</tt> +# - Struct: <tt>require 'json/add/struct'</tt> +# - Symbol: <tt>require 'json/add/symbol'</tt> +# - Time: <tt>require 'json/add/time'</tt> +# +# To reduce punctuation clutter, the examples below +# show the generated \JSON via +puts+, rather than the usual +inspect+, +# +# \BigDecimal: +# require 'json/add/bigdecimal' +# ruby0 = BigDecimal(0) # 0.0 +# json = JSON.generate(ruby0) # {"json_class":"BigDecimal","b":"27:0.0"} +# ruby1 = JSON.parse(json, create_additions: true) # 0.0 +# ruby1.class # => BigDecimal +# +# \Complex: +# require 'json/add/complex' +# ruby0 = Complex(1+0i) # 1+0i +# json = JSON.generate(ruby0) # {"json_class":"Complex","r":1,"i":0} +# ruby1 = JSON.parse(json, create_additions: true) # 1+0i +# ruby1.class # Complex +# +# \Date: +# require 'json/add/date' +# ruby0 = Date.today # 2020-05-02 +# json = JSON.generate(ruby0) # {"json_class":"Date","y":2020,"m":5,"d":2,"sg":2299161.0} +# ruby1 = JSON.parse(json, create_additions: true) # 2020-05-02 +# ruby1.class # Date +# +# \DateTime: +# require 'json/add/date_time' +# ruby0 = DateTime.now # 2020-05-02T10:38:13-05:00 +# json = JSON.generate(ruby0) # {"json_class":"DateTime","y":2020,"m":5,"d":2,"H":10,"M":38,"S":13,"of":"-5/24","sg":2299161.0} +# ruby1 = JSON.parse(json, create_additions: true) # 2020-05-02T10:38:13-05:00 +# ruby1.class # DateTime +# +# \Exception (and its subclasses including \RuntimeError): +# require 'json/add/exception' +# ruby0 = Exception.new('A message') # A message +# json = JSON.generate(ruby0) # {"json_class":"Exception","m":"A message","b":null} +# ruby1 = JSON.parse(json, create_additions: true) # A message +# ruby1.class # Exception +# ruby0 = RuntimeError.new('Another message') # Another message +# json = JSON.generate(ruby0) # {"json_class":"RuntimeError","m":"Another message","b":null} +# ruby1 = JSON.parse(json, create_additions: true) # Another message +# ruby1.class # RuntimeError +# +# \OpenStruct: +# require 'json/add/ostruct' +# ruby0 = OpenStruct.new(name: 'Matz', language: 'Ruby') # #<OpenStruct name="Matz", language="Ruby"> +# json = JSON.generate(ruby0) # {"json_class":"OpenStruct","t":{"name":"Matz","language":"Ruby"}} +# ruby1 = JSON.parse(json, create_additions: true) # #<OpenStruct name="Matz", language="Ruby"> +# ruby1.class # OpenStruct +# +# \Range: +# require 'json/add/range' +# ruby0 = Range.new(0, 2) # 0..2 +# json = JSON.generate(ruby0) # {"json_class":"Range","a":[0,2,false]} +# ruby1 = JSON.parse(json, create_additions: true) # 0..2 +# ruby1.class # Range +# +# \Rational: +# require 'json/add/rational' +# ruby0 = Rational(1, 3) # 1/3 +# json = JSON.generate(ruby0) # {"json_class":"Rational","n":1,"d":3} +# ruby1 = JSON.parse(json, create_additions: true) # 1/3 +# ruby1.class # Rational +# +# \Regexp: +# require 'json/add/regexp' +# ruby0 = Regexp.new('foo') # (?-mix:foo) +# json = JSON.generate(ruby0) # {"json_class":"Regexp","o":0,"s":"foo"} +# ruby1 = JSON.parse(json, create_additions: true) # (?-mix:foo) +# ruby1.class # Regexp +# +# \Set: +# require 'json/add/set' +# ruby0 = Set.new([0, 1, 2]) # #<Set: {0, 1, 2}> +# json = JSON.generate(ruby0) # {"json_class":"Set","a":[0,1,2]} +# ruby1 = JSON.parse(json, create_additions: true) # #<Set: {0, 1, 2}> +# ruby1.class # Set +# +# \Struct: +# require 'json/add/struct' +# Customer = Struct.new(:name, :address) # Customer +# ruby0 = Customer.new("Dave", "123 Main") # #<struct Customer name="Dave", address="123 Main"> +# json = JSON.generate(ruby0) # {"json_class":"Customer","v":["Dave","123 Main"]} +# ruby1 = JSON.parse(json, create_additions: true) # #<struct Customer name="Dave", address="123 Main"> +# ruby1.class # Customer +# +# \Symbol: +# require 'json/add/symbol' +# ruby0 = :foo # foo +# json = JSON.generate(ruby0) # {"json_class":"Symbol","s":"foo"} +# ruby1 = JSON.parse(json, create_additions: true) # foo +# ruby1.class # Symbol +# +# \Time: +# require 'json/add/time' +# ruby0 = Time.now # 2020-05-02 11:28:26 -0500 +# json = JSON.generate(ruby0) # {"json_class":"Time","s":1588436906,"n":840560000} +# ruby1 = JSON.parse(json, create_additions: true) # 2020-05-02 11:28:26 -0500 +# ruby1.class # Time +# +# +# === Custom \JSON Additions +# +# In addition to the \JSON additions provided, +# you can craft \JSON additions of your own, +# either for Ruby built-in classes or for user-defined classes. +# +# Here's a user-defined class +Foo+: +# class Foo +# attr_accessor :bar, :baz +# def initialize(bar, baz) +# self.bar = bar +# self.baz = baz +# end +# end +# +# Here's the \JSON addition for it: +# # Extend class Foo with JSON addition. +# class Foo +# # Serialize Foo object with its class name and arguments +# def to_json(*args) +# { +# JSON.create_id => self.class.name, +# 'a' => [ bar, baz ] +# }.to_json(*args) +# end +# # Deserialize JSON string by constructing new Foo object with arguments. +# def self.json_create(object) +# new(*object['a']) +# end +# end +# +# Demonstration: # require 'json' +# # This Foo object has no custom addition. +# foo0 = Foo.new(0, 1) +# json0 = JSON.generate(foo0) +# obj0 = JSON.parse(json0) +# # Lood the custom addition. +# require_relative 'foo_addition' +# # This foo has the custom addition. +# foo1 = Foo.new(0, 1) +# json1 = JSON.generate(foo1) +# obj1 = JSON.parse(json1, create_additions: true) +# # Make a nice display. +# display = <<~EOT +# Generated JSON: +# Without custom addition: #{json0} (#{json0.class}) +# With custom addition: #{json1} (#{json1.class}) +# Parsed JSON: +# Without custom addition: #{obj0.inspect} (#{obj0.class}) +# With custom addition: #{obj1.inspect} (#{obj1.class}) +# EOT +# puts display +# +# Output: # -# 1.to_json => "1" +# Generated JSON: +# Without custom addition: "#<Foo:0x0000000006534e80>" (String) +# With custom addition: {"json_class":"Foo","a":[0,1]} (String) +# Parsed JSON: +# Without custom addition: "#<Foo:0x0000000006534e80>" (String) +# With custom addition: #<Foo:0x0000000006473bb8 @bar=0, @baz=1> (Foo) # module JSON require 'json/version' - - begin - require 'json/ext' - rescue LoadError - require 'json/pure' - end + require 'json/ext' end diff --git a/ext/json/lib/json/add/bigdecimal.rb b/ext/json/lib/json/add/bigdecimal.rb index 539daeeaf5..dc84572f31 100644 --- a/ext/json/lib/json/add/bigdecimal.rb +++ b/ext/json/lib/json/add/bigdecimal.rb @@ -1,29 +1,58 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end -defined?(::BigDecimal) or require 'bigdecimal' +begin + require 'bigdecimal' +rescue LoadError +end class BigDecimal - # Import a JSON Marshalled object. - # - # method used for JSON marshalling support. + + # See #as_json. def self.json_create(object) BigDecimal._load object['b'] end - # Marshal the object to JSON. + # Methods <tt>BigDecimal#as_json</tt> and +BigDecimal.json_create+ may be used + # to serialize and deserialize a \BigDecimal object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>BigDecimal#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/bigdecimal' + # x = BigDecimal(2).as_json # => {"json_class"=>"BigDecimal", "b"=>"27:0.2e1"} + # y = BigDecimal(2.0, 4).as_json # => {"json_class"=>"BigDecimal", "b"=>"36:0.2e1"} + # z = BigDecimal(Complex(2, 0)).as_json # => {"json_class"=>"BigDecimal", "b"=>"27:0.2e1"} + # + # \Method +JSON.create+ deserializes such a hash, returning a \BigDecimal object: + # + # BigDecimal.json_create(x) # => 0.2e1 + # BigDecimal.json_create(y) # => 0.2e1 + # BigDecimal.json_create(z) # => 0.2e1 # - # method used for JSON marshalling support. def as_json(*) { JSON.create_id => self.class.name, - 'b' => _dump, + 'b' => _dump.force_encoding(Encoding::UTF_8), } end - # return the JSON value - def to_json(*) - as_json.to_json + # Returns a JSON string representing +self+: + # + # require 'json/add/bigdecimal' + # puts BigDecimal(2).to_json + # puts BigDecimal(2.0, 4).to_json + # puts BigDecimal(Complex(2, 0)).to_json + # + # Output: + # + # {"json_class":"BigDecimal","b":"27:0.2e1"} + # {"json_class":"BigDecimal","b":"36:0.2e1"} + # {"json_class":"BigDecimal","b":"27:0.2e1"} + # + def to_json(*args) + as_json.to_json(*args) end -end +end if defined?(::BigDecimal) diff --git a/ext/json/lib/json/add/complex.rb b/ext/json/lib/json/add/complex.rb index 28ef734daf..9e3c6f2d0a 100644 --- a/ext/json/lib/json/add/complex.rb +++ b/ext/json/lib/json/add/complex.rb @@ -1,19 +1,31 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end -defined?(::Complex) or require 'complex' class Complex - # Deserializes JSON string by converting Real value <tt>r</tt>, imaginary - # value <tt>i</tt>, to a Complex object. + # See #as_json. def self.json_create(object) Complex(object['r'], object['i']) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Complex#as_json</tt> and +Complex.json_create+ may be used + # to serialize and deserialize a \Complex object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Complex#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/complex' + # x = Complex(2).as_json # => {"json_class"=>"Complex", "r"=>2, "i"=>0} + # y = Complex(2.0, 4).as_json # => {"json_class"=>"Complex", "r"=>2.0, "i"=>4} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Complex object: + # + # Complex.json_create(x) # => (2+0i) + # Complex.json_create(y) # => (2.0+4i) + # def as_json(*) { JSON.create_id => self.class.name, @@ -22,8 +34,18 @@ class Complex } end - # Stores class name (Complex) along with real value <tt>r</tt> and imaginary value <tt>i</tt> as JSON string - def to_json(*) - as_json.to_json + # Returns a JSON string representing +self+: + # + # require 'json/add/complex' + # puts Complex(2).to_json + # puts Complex(2.0, 4).to_json + # + # Output: + # + # {"json_class":"Complex","r":2,"i":0} + # {"json_class":"Complex","r":2.0,"i":4} + # + def to_json(*args) + as_json.to_json(*args) end end diff --git a/ext/json/lib/json/add/core.rb b/ext/json/lib/json/add/core.rb index bfb017c460..61ff454212 100644 --- a/ext/json/lib/json/add/core.rb +++ b/ext/json/lib/json/add/core.rb @@ -1,4 +1,4 @@ -#frozen_string_literal: false +# frozen_string_literal: true # This file requires the implementations of ruby core's custom objects for # serialisation/deserialisation. @@ -7,6 +7,7 @@ require 'json/add/date_time' require 'json/add/exception' require 'json/add/range' require 'json/add/regexp' +require 'json/add/string' require 'json/add/struct' require 'json/add/symbol' require 'json/add/time' diff --git a/ext/json/lib/json/add/date.rb b/ext/json/lib/json/add/date.rb index 25523561a5..88a098b637 100644 --- a/ext/json/lib/json/add/date.rb +++ b/ext/json/lib/json/add/date.rb @@ -1,4 +1,4 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end @@ -6,16 +6,29 @@ require 'date' class Date - # Deserializes JSON string by converting Julian year <tt>y</tt>, month - # <tt>m</tt>, day <tt>d</tt> and Day of Calendar Reform <tt>sg</tt> to Date. + # See #as_json. def self.json_create(object) civil(*object.values_at('y', 'm', 'd', 'sg')) end alias start sg unless method_defined?(:start) - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Date#as_json</tt> and +Date.json_create+ may be used + # to serialize and deserialize a \Date object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Date#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/date' + # x = Date.today.as_json + # # => {"json_class"=>"Date", "y"=>2023, "m"=>11, "d"=>21, "sg"=>2299161.0} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Date object: + # + # Date.json_create(x) + # # => #<Date: 2023-11-21 ((2460270j,0s,0n),+0s,2299161j)> + # def as_json(*) { JSON.create_id => self.class.name, @@ -26,8 +39,15 @@ class Date } end - # Stores class name (Date) with Julian year <tt>y</tt>, month <tt>m</tt>, day - # <tt>d</tt> and Day of Calendar Reform <tt>sg</tt> as JSON string + # Returns a JSON string representing +self+: + # + # require 'json/add/date' + # puts Date.today.to_json + # + # Output: + # + # {"json_class":"Date","y":2023,"m":11,"d":21,"sg":2299161.0} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/add/date_time.rb b/ext/json/lib/json/add/date_time.rb index 38b0e86ab8..8b0bb5d181 100644 --- a/ext/json/lib/json/add/date_time.rb +++ b/ext/json/lib/json/add/date_time.rb @@ -1,4 +1,4 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end @@ -6,9 +6,7 @@ require 'date' class DateTime - # Deserializes JSON string by converting year <tt>y</tt>, month <tt>m</tt>, - # day <tt>d</tt>, hour <tt>H</tt>, minute <tt>M</tt>, second <tt>S</tt>, - # offset <tt>of</tt> and Day of Calendar Reform <tt>sg</tt> to DateTime. + # See #as_json. def self.json_create(object) args = object.values_at('y', 'm', 'd', 'H', 'M', 'S') of_a, of_b = object['of'].split('/') @@ -23,8 +21,21 @@ class DateTime alias start sg unless method_defined?(:start) - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>DateTime#as_json</tt> and +DateTime.json_create+ may be used + # to serialize and deserialize a \DateTime object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>DateTime#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/datetime' + # x = DateTime.now.as_json + # # => {"json_class"=>"DateTime", "y"=>2023, "m"=>11, "d"=>21, "sg"=>2299161.0} + # + # \Method +JSON.create+ deserializes such a hash, returning a \DateTime object: + # + # DateTime.json_create(x) # BUG? Raises Date::Error "invalid date" + # def as_json(*) { JSON.create_id => self.class.name, @@ -39,9 +50,15 @@ class DateTime } end - # Stores class name (DateTime) with Julian year <tt>y</tt>, month <tt>m</tt>, - # day <tt>d</tt>, hour <tt>H</tt>, minute <tt>M</tt>, second <tt>S</tt>, - # offset <tt>of</tt> and Day of Calendar Reform <tt>sg</tt> as JSON string + # Returns a JSON string representing +self+: + # + # require 'json/add/datetime' + # puts DateTime.now.to_json + # + # Output: + # + # {"json_class":"DateTime","y":2023,"m":11,"d":21,"sg":2299161.0} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/add/exception.rb b/ext/json/lib/json/add/exception.rb index a107e5b3c4..e85d404982 100644 --- a/ext/json/lib/json/add/exception.rb +++ b/ext/json/lib/json/add/exception.rb @@ -1,20 +1,31 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end class Exception - # Deserializes JSON string by constructing new Exception object with message - # <tt>m</tt> and backtrace <tt>b</tt> serialized with <tt>to_json</tt> + # See #as_json. def self.json_create(object) result = new(object['m']) result.set_backtrace object['b'] result end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Exception#as_json</tt> and +Exception.json_create+ may be used + # to serialize and deserialize a \Exception object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Exception#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/exception' + # x = Exception.new('Foo').as_json # => {"json_class"=>"Exception", "m"=>"Foo", "b"=>nil} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Exception object: + # + # Exception.json_create(x) # => #<Exception: Foo> + # def as_json(*) { JSON.create_id => self.class.name, @@ -23,8 +34,15 @@ class Exception } end - # Stores class name (Exception) with message <tt>m</tt> and backtrace array - # <tt>b</tt> as JSON string + # Returns a JSON string representing +self+: + # + # require 'json/add/exception' + # puts Exception.new('Foo').to_json + # + # Output: + # + # {"json_class":"Exception","m":"Foo","b":null} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/add/ostruct.rb b/ext/json/lib/json/add/ostruct.rb index 686cf0025d..7750498144 100644 --- a/ext/json/lib/json/add/ostruct.rb +++ b/ext/json/lib/json/add/ostruct.rb @@ -1,19 +1,35 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end -require 'ostruct' +begin + require 'ostruct' +rescue LoadError +end class OpenStruct - # Deserializes JSON string by constructing new Struct object with values - # <tt>t</tt> serialized by <tt>to_json</tt>. + # See #as_json. def self.json_create(object) new(object['t'] || object[:t]) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>OpenStruct#as_json</tt> and +OpenStruct.json_create+ may be used + # to serialize and deserialize a \OpenStruct object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>OpenStruct#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/ostruct' + # x = OpenStruct.new('name' => 'Rowdy', :age => nil).as_json + # # => {"json_class"=>"OpenStruct", "t"=>{:name=>'Rowdy', :age=>nil}} + # + # \Method +JSON.create+ deserializes such a hash, returning a \OpenStruct object: + # + # OpenStruct.json_create(x) + # # => #<OpenStruct name='Rowdy', age=nil> + # def as_json(*) klass = self.class.name klass.to_s.empty? and raise JSON::JSONError, "Only named structs are supported!" @@ -23,9 +39,16 @@ class OpenStruct } end - # Stores class name (OpenStruct) with this struct's values <tt>t</tt> as a - # JSON string. + # Returns a JSON string representing +self+: + # + # require 'json/add/ostruct' + # puts OpenStruct.new('name' => 'Rowdy', :age => nil).to_json + # + # Output: + # + # {"json_class":"OpenStruct","t":{'name':'Rowdy',"age":null}} + # def to_json(*args) as_json.to_json(*args) end -end +end if defined?(::OpenStruct) diff --git a/ext/json/lib/json/add/range.rb b/ext/json/lib/json/add/range.rb index 93529fb1c4..408d2c32f6 100644 --- a/ext/json/lib/json/add/range.rb +++ b/ext/json/lib/json/add/range.rb @@ -1,18 +1,33 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end class Range - # Deserializes JSON string by constructing new Range object with arguments - # <tt>a</tt> serialized by <tt>to_json</tt>. + # See #as_json. def self.json_create(object) new(*object['a']) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Range#as_json</tt> and +Range.json_create+ may be used + # to serialize and deserialize a \Range object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Range#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/range' + # x = (1..4).as_json # => {"json_class"=>"Range", "a"=>[1, 4, false]} + # y = (1...4).as_json # => {"json_class"=>"Range", "a"=>[1, 4, true]} + # z = ('a'..'d').as_json # => {"json_class"=>"Range", "a"=>["a", "d", false]} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Range object: + # + # Range.json_create(x) # => 1..4 + # Range.json_create(y) # => 1...4 + # Range.json_create(z) # => "a".."d" + # def as_json(*) { JSON.create_id => self.class.name, @@ -20,9 +35,19 @@ class Range } end - # Stores class name (Range) with JSON array of arguments <tt>a</tt> which - # include <tt>first</tt> (integer), <tt>last</tt> (integer), and - # <tt>exclude_end?</tt> (boolean) as JSON string. + # Returns a JSON string representing +self+: + # + # require 'json/add/range' + # puts (1..4).to_json + # puts (1...4).to_json + # puts ('a'..'d').to_json + # + # Output: + # + # {"json_class":"Range","a":[1,4,false]} + # {"json_class":"Range","a":[1,4,true]} + # {"json_class":"Range","a":["a","d",false]} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/add/rational.rb b/ext/json/lib/json/add/rational.rb index 356940b225..c95812ea8e 100644 --- a/ext/json/lib/json/add/rational.rb +++ b/ext/json/lib/json/add/rational.rb @@ -1,18 +1,31 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end -defined?(::Rational) or require 'rational' class Rational - # Deserializes JSON string by converting numerator value <tt>n</tt>, - # denominator value <tt>d</tt>, to a Rational object. + + # See #as_json. def self.json_create(object) Rational(object['n'], object['d']) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Rational#as_json</tt> and +Rational.json_create+ may be used + # to serialize and deserialize a \Rational object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Rational#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/rational' + # x = Rational(2, 3).as_json + # # => {"json_class"=>"Rational", "n"=>2, "d"=>3} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Rational object: + # + # Rational.json_create(x) + # # => (2/3) + # def as_json(*) { JSON.create_id => self.class.name, @@ -21,8 +34,16 @@ class Rational } end - # Stores class name (Rational) along with numerator value <tt>n</tt> and denominator value <tt>d</tt> as JSON string - def to_json(*) - as_json.to_json + # Returns a JSON string representing +self+: + # + # require 'json/add/rational' + # puts Rational(2, 3).to_json + # + # Output: + # + # {"json_class":"Rational","n":2,"d":3} + # + def to_json(*args) + as_json.to_json(*args) end end diff --git a/ext/json/lib/json/add/regexp.rb b/ext/json/lib/json/add/regexp.rb index a93866b05a..aebfb2db5c 100644 --- a/ext/json/lib/json/add/regexp.rb +++ b/ext/json/lib/json/add/regexp.rb @@ -1,19 +1,30 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end class Regexp - # Deserializes JSON string by constructing new Regexp object with source - # <tt>s</tt> (Regexp or String) and options <tt>o</tt> serialized by - # <tt>to_json</tt> + # See #as_json. def self.json_create(object) new(object['s'], object['o']) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Regexp#as_json</tt> and +Regexp.json_create+ may be used + # to serialize and deserialize a \Regexp object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Regexp#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/regexp' + # x = /foo/.as_json + # # => {"json_class"=>"Regexp", "o"=>0, "s"=>"foo"} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Regexp object: + # + # Regexp.json_create(x) # => /foo/ + # def as_json(*) { JSON.create_id => self.class.name, @@ -22,9 +33,16 @@ class Regexp } end - # Stores class name (Regexp) with options <tt>o</tt> and source <tt>s</tt> - # (Regexp or String) as JSON string - def to_json(*) - as_json.to_json + # Returns a JSON string representing +self+: + # + # require 'json/add/regexp' + # puts /foo/.to_json + # + # Output: + # + # {"json_class":"Regexp","o":0,"s":"foo"} + # + def to_json(*args) + as_json.to_json(*args) end end diff --git a/ext/json/lib/json/add/set.rb b/ext/json/lib/json/add/set.rb index 71e2a0ac8b..1918353187 100644 --- a/ext/json/lib/json/add/set.rb +++ b/ext/json/lib/json/add/set.rb @@ -4,16 +4,27 @@ end defined?(::Set) or require 'set' class Set - # Import a JSON Marshalled object. - # - # method used for JSON marshalling support. + + # See #as_json. def self.json_create(object) new object['a'] end - # Marshal the object to JSON. + # Methods <tt>Set#as_json</tt> and +Set.json_create+ may be used + # to serialize and deserialize a \Set object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Set#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/set' + # x = Set.new(%w/foo bar baz/).as_json + # # => {"json_class"=>"Set", "a"=>["foo", "bar", "baz"]} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Set object: + # + # Set.json_create(x) # => #<Set: {"foo", "bar", "baz"}> # - # method used for JSON marshalling support. def as_json(*) { JSON.create_id => self.class.name, @@ -21,7 +32,15 @@ class Set } end - # return the JSON value + # Returns a JSON string representing +self+: + # + # require 'json/add/set' + # puts Set.new(%w/foo bar baz/).to_json + # + # Output: + # + # {"json_class":"Set","a":["foo","bar","baz"]} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/add/string.rb b/ext/json/lib/json/add/string.rb new file mode 100644 index 0000000000..9c3bde27fb --- /dev/null +++ b/ext/json/lib/json/add/string.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true +unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED + require 'json' +end + +class String + # call-seq: json_create(o) + # + # Raw Strings are JSON Objects (the raw bytes are stored in an array for the + # key "raw"). The Ruby String can be created by this class method. + def self.json_create(object) + object["raw"].pack("C*") + end + + # call-seq: to_json_raw_object() + # + # This method creates a raw object hash, that can be nested into + # other data structures and will be generated as a raw string. This + # method should be used, if you want to convert raw strings to JSON + # instead of UTF-8 strings, e. g. binary data. + def to_json_raw_object + { + JSON.create_id => self.class.name, + "raw" => unpack("C*"), + } + end + + # call-seq: to_json_raw(*args) + # + # This method creates a JSON text from the result of a call to + # to_json_raw_object of this String. + def to_json_raw(...) + to_json_raw_object.to_json(...) + end +end diff --git a/ext/json/lib/json/add/struct.rb b/ext/json/lib/json/add/struct.rb index e8395ed42f..6760c3d86c 100644 --- a/ext/json/lib/json/add/struct.rb +++ b/ext/json/lib/json/add/struct.rb @@ -1,18 +1,32 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end class Struct - # Deserializes JSON string by constructing new Struct object with values - # <tt>v</tt> serialized by <tt>to_json</tt>. + # See #as_json. def self.json_create(object) new(*object['v']) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Struct#as_json</tt> and +Struct.json_create+ may be used + # to serialize and deserialize a \Struct object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Struct#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/struct' + # Customer = Struct.new('Customer', :name, :address, :zip) + # x = Struct::Customer.new.as_json + # # => {"json_class"=>"Struct::Customer", "v"=>[nil, nil, nil]} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Struct object: + # + # Struct::Customer.json_create(x) + # # => #<struct Struct::Customer name=nil, address=nil, zip=nil> + # def as_json(*) klass = self.class.name klass.to_s.empty? and raise JSON::JSONError, "Only named structs are supported!" @@ -22,8 +36,16 @@ class Struct } end - # Stores class name (Struct) with Struct values <tt>v</tt> as a JSON string. - # Only named structs are supported. + # Returns a JSON string representing +self+: + # + # require 'json/add/struct' + # Customer = Struct.new('Customer', :name, :address, :zip) + # puts Struct::Customer.new.to_json + # + # Output: + # + # {"json_class":"Struct","t":{'name':'Rowdy',"age":null}} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/add/symbol.rb b/ext/json/lib/json/add/symbol.rb index 74b13a423f..806be4f025 100644 --- a/ext/json/lib/json/add/symbol.rb +++ b/ext/json/lib/json/add/symbol.rb @@ -1,11 +1,25 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end class Symbol - # Returns a hash, that will be turned into a JSON object and represent this - # object. + + # Methods <tt>Symbol#as_json</tt> and +Symbol.json_create+ may be used + # to serialize and deserialize a \Symbol object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Symbol#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/symbol' + # x = :foo.as_json + # # => {"json_class"=>"Symbol", "s"=>"foo"} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Symbol object: + # + # Symbol.json_create(x) # => :foo + # def as_json(*) { JSON.create_id => self.class.name, @@ -13,12 +27,25 @@ class Symbol } end - # Stores class name (Symbol) with String representation of Symbol as a JSON string. - def to_json(*a) - as_json.to_json(*a) + # Returns a JSON string representing +self+: + # + # require 'json/add/symbol' + # puts :foo.to_json + # + # Output: + # + # # {"json_class":"Symbol","s":"foo"} + # + def to_json(state = nil, *a) + state = ::JSON::State.from_state(state) + if state.strict? + super + else + as_json.to_json(state, *a) + end end - # Deserializes JSON string by converting the <tt>string</tt> value stored in the object to a Symbol + # See #as_json. def self.json_create(o) o['s'].to_sym end diff --git a/ext/json/lib/json/add/time.rb b/ext/json/lib/json/add/time.rb index b73acc4086..b03d4ff251 100644 --- a/ext/json/lib/json/add/time.rb +++ b/ext/json/lib/json/add/time.rb @@ -1,37 +1,51 @@ -#frozen_string_literal: false +# frozen_string_literal: true unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED require 'json' end class Time - # Deserializes JSON string by converting time since epoch to Time + # See #as_json. def self.json_create(object) if usec = object.delete('u') # used to be tv_usec -> tv_nsec object['n'] = usec * 1000 end - if method_defined?(:tv_nsec) - at(object['s'], Rational(object['n'], 1000)) - else - at(object['s'], object['n'] / 1000) - end + at(object['s'], Rational(object['n'], 1000)) end - # Returns a hash, that will be turned into a JSON object and represent this - # object. + # Methods <tt>Time#as_json</tt> and +Time.json_create+ may be used + # to serialize and deserialize a \Time object; + # see Marshal[rdoc-ref:Marshal]. + # + # \Method <tt>Time#as_json</tt> serializes +self+, + # returning a 2-element hash representing +self+: + # + # require 'json/add/time' + # x = Time.now.as_json + # # => {"json_class"=>"Time", "s"=>1700931656, "n"=>472846644} + # + # \Method +JSON.create+ deserializes such a hash, returning a \Time object: + # + # Time.json_create(x) + # # => 2023-11-25 11:00:56.472846644 -0600 + # def as_json(*) - nanoseconds = [ tv_usec * 1000 ] - respond_to?(:tv_nsec) and nanoseconds << tv_nsec - nanoseconds = nanoseconds.max { JSON.create_id => self.class.name, 's' => tv_sec, - 'n' => nanoseconds, + 'n' => tv_nsec, } end - # Stores class name (Time) with number of seconds since epoch and number of - # microseconds for Time as JSON string + # Returns a JSON string representing +self+: + # + # require 'json/add/time' + # puts Time.now.to_json + # + # Output: + # + # {"json_class":"Time","s":1700931678,"n":980650786} + # def to_json(*args) as_json.to_json(*args) end diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 7cc852916c..877b96814e 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -1,25 +1,148 @@ -#frozen_string_literal: false +# frozen_string_literal: true + require 'json/version' -require 'json/generic_object' module JSON + autoload :GenericObject, 'json/generic_object' + + module ParserOptions # :nodoc: + class << self + def prepare(opts) + if opts[:object_class] || opts[:array_class] + opts = opts.dup + on_load = opts[:on_load] + + on_load = object_class_proc(opts[:object_class], on_load) if opts[:object_class] + on_load = array_class_proc(opts[:array_class], on_load) if opts[:array_class] + opts[:on_load] = on_load + end + + if opts.fetch(:create_additions, false) != false + opts = create_additions_proc(opts) + end + + opts + end + + private + + def object_class_proc(object_class, on_load) + ->(obj) do + if Hash === obj + object = object_class.new + obj.each { |k, v| object[k] = v } + obj = object + end + on_load.nil? ? obj : on_load.call(obj) + end + end + + def array_class_proc(array_class, on_load) + ->(obj) do + if Array === obj + array = array_class.new + obj.each { |v| array << v } + obj = array + end + on_load.nil? ? obj : on_load.call(obj) + end + end + + # TODO: extract :create_additions support to another gem for version 3.0 + def create_additions_proc(opts) + if opts[:symbolize_names] + raise ArgumentError, "options :symbolize_names and :create_additions cannot be used in conjunction" + end + + opts = opts.dup + create_additions = opts.fetch(:create_additions, false) + on_load = opts[:on_load] + object_class = opts[:object_class] || Hash + + opts[:on_load] = ->(object) do + case object + when String + opts[:match_string]&.each do |pattern, klass| + if match = pattern.match(object) + create_additions_warning if create_additions.nil? + object = klass.json_create(object) + break + end + end + when object_class + if opts[:create_additions] != false + if class_path = object[JSON.create_id] + klass = begin + Object.const_get(class_path) + rescue NameError => e + raise ArgumentError, "can't get const #{class_path}: #{e}" + end + + if klass.respond_to?(:json_creatable?) ? klass.json_creatable? : klass.respond_to?(:json_create) + create_additions_warning if create_additions.nil? + object = klass.json_create(object) + end + end + end + end + + on_load.nil? ? object : on_load.call(object) + end + + opts + end + + def create_additions_warning + JSON.deprecation_warning "JSON.load implicit support for `create_additions: true` is deprecated " \ + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " \ + "pass `create_additions: true`" + end + end + end + class << self - # If _object_ is string-like, parse the string and return the parsed - # result as a Ruby data structure. Otherwise generate a JSON text from the - # Ruby data structure object and return it. - # - # The _opts_ argument is passed through to generate/parse respectively. - # See generate and parse for their documentation. - def [](object, opts = {}) - if object.respond_to? :to_str - JSON.parse(object.to_str, opts) + def deprecation_warning(message, uplevel = 3) # :nodoc: + gem_root = File.expand_path("..", __dir__) + "/" + caller_locations(uplevel, 10).each do |frame| + if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c") + uplevel += 1 + else + break + end + end + + if RUBY_VERSION >= "3.0" + warn(message, uplevel: uplevel, category: :deprecated) else - JSON.generate(object, opts) + warn(message, uplevel: uplevel) end end - # Returns the JSON parser class that is used by JSON. This is either - # JSON::Ext::Parser or JSON::Pure::Parser. + # :call-seq: + # JSON[object] -> new_array or new_string + # + # If +object+ is a \String, + # calls JSON.parse with +object+ and +opts+ (see method #parse): + # json = '[0, 1, null]' + # JSON[json]# => [0, 1, nil] + # + # Otherwise, calls JSON.generate with +object+ and +opts+ (see method #generate): + # ruby = [0, 1, nil] + # JSON[ruby] # => '[0,1,null]' + def [](object, opts = nil) + if object.is_a?(String) + return JSON.parse(object, opts) + elsif object.respond_to?(:to_str) + str = object.to_str + if str.is_a?(String) + return JSON.parse(str, opts) + end + end + + JSON.generate(object, opts) + end + + # Returns the JSON parser class that is used by JSON. attr_reader :parser # Set the JSON parser class _parser_ to be used by JSON. @@ -29,32 +152,13 @@ module JSON const_set :Parser, parser end - # Return the constant located at _path_. The format of _path_ has to be - # either ::A::B::C or A::B::C. In any case, A has to be located at the top - # level (absolute namespace path?). If there doesn't exist a constant at - # the given path, an ArgumentError is raised. - def deep_const_get(path) # :nodoc: - path.to_s.split(/::/).inject(Object) do |p, c| - case - when c.empty? then p - when p.const_defined?(c, true) then p.const_get(c) - else - begin - p.const_missing(c) - rescue NameError => e - raise ArgumentError, "can't get const #{path}: #{e}" - end - end - end - end - # Set the module _generator_ to be used by JSON. def generator=(generator) # :nodoc: old, $VERBOSE = $VERBOSE, nil @generator = generator generator_methods = generator::GeneratorMethods for const in generator_methods.constants - klass = deep_const_get(const) + klass = const_get(const) modul = generator_methods.const_get(const) klass.class_eval do instance_methods(false).each do |m| @@ -64,347 +168,932 @@ module JSON end end self.state = generator::State - const_set :State, self.state - const_set :SAFE_STATE_PROTOTYPE, State.new - const_set :FAST_STATE_PROTOTYPE, State.new( - :indent => '', - :space => '', - :object_nl => "", - :array_nl => "", - :max_nesting => false - ) - const_set :PRETTY_STATE_PROTOTYPE, State.new( - :indent => ' ', - :space => ' ', - :object_nl => "\n", - :array_nl => "\n" - ) + const_set :State, state ensure $VERBOSE = old end - # Returns the JSON generator module that is used by JSON. This is - # either JSON::Ext::Generator or JSON::Pure::Generator. + # Returns the JSON generator module that is used by JSON. attr_reader :generator - # Returns the JSON generator state class that is used by JSON. This is - # either JSON::Ext::Generator::State or JSON::Pure::Generator::State. + # Sets or Returns the JSON generator state class that is used by JSON. attr_accessor :state - # This is create identifier, which is used to decide if the _json_create_ - # hook of a class should be called. It defaults to 'json_class'. - attr_accessor :create_id + private + + # Called from the extension when a hash has both string and symbol keys + def on_mixed_keys_hash(hash, do_raise) + set = {} + hash.each_key do |key| + key_str = key.to_s + + if set[key_str] + message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}" + if do_raise + raise GeneratorError, message + else + deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`") + end + else + set[key_str] = true + end + end + end + + def deprecated_singleton_attr_accessor(*attrs) + args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : "" + attrs.each do |attr| + singleton_class.class_eval <<~RUBY + def #{attr} + warn "JSON.#{attr} is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args} + @#{attr} + end + + def #{attr}=(val) + warn "JSON.#{attr}= is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args} + @#{attr} = val + end + + def _#{attr} + @#{attr} + end + RUBY + end + end + end + + # Sets create identifier, which is used to decide if the _json_create_ + # hook of a class should be called; initial value is +json_class+: + # JSON.create_id # => 'json_class' + def self.create_id=(new_value) + Thread.current[:"JSON.create_id"] = new_value.dup.freeze + end + + # Returns the current create identifier. + # See also JSON.create_id=. + def self.create_id + Thread.current[:"JSON.create_id"] || 'json_class' end - self.create_id = 'json_class' - NaN = 0.0/0 + NaN = Float::NAN - Infinity = 1.0/0 + Infinity = Float::INFINITY MinusInfinity = -Infinity # The base exception for JSON errors. - class JSONError < StandardError - def self.wrap(exception) - obj = new("Wrapped(#{exception.class}): #{exception.message.inspect}") - obj.set_backtrace exception.backtrace - obj - end - end + class JSONError < StandardError; end # This exception is raised if a parser error occurs. - class ParserError < JSONError; end + class ParserError < JSONError + attr_reader :line, :column + end # This exception is raised if the nesting of parsed data structures is too # deep. class NestingError < ParserError; end - # :stopdoc: - class CircularDatastructure < NestingError; end - # :startdoc: - # This exception is raised if a generator or unparser error occurs. - class GeneratorError < JSONError; end - # For backwards compatibility - UnparserError = GeneratorError + class GeneratorError < JSONError + attr_reader :invalid_object + + def initialize(message, invalid_object = nil) + super(message) + @invalid_object = invalid_object + end + + def detailed_message(...) + # Exception#detailed_message doesn't exist until Ruby 3.2 + super_message = defined?(super) ? super : message + + if @invalid_object.nil? + super_message + else + "#{super_message}\nInvalid object: #{@invalid_object.inspect}" + end + end + end + + # Fragment of JSON document that is to be included as is: + # fragment = JSON::Fragment.new("[1, 2, 3]") + # JSON.generate({ count: 3, items: fragments }) + # + # This allows to easily assemble multiple JSON fragments that have + # been persisted somewhere without having to parse them nor resorting + # to string interpolation. + # + # Note: no validation is performed on the provided string. It is the + # responsibility of the caller to ensure the string contains valid JSON. + Fragment = Struct.new(:json) do + def initialize(json) + unless string = String.try_convert(json) + raise TypeError, " no implicit conversion of #{json.class} into String" + end - # This exception is raised if the required unicode support is missing on the - # system. Usually this means that the iconv library is not installed. - class MissingUnicodeSupport < JSONError; end + super(string) + end + + def to_json(state = nil, *) + json + end + end module_function - # Parse the JSON document _source_ into a Ruby data structure and return it. - # - # _opts_ can have the following - # keys: - # * *max_nesting*: The maximum depth of nesting allowed in the parsed data - # structures. Disable depth checking with :max_nesting => false. It - # defaults to 100. - # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - # defiance of RFC 7159 to be parsed by the Parser. This option defaults - # to false. - # * *symbolize_names*: If set to true, returns symbols for the names - # (keys) in a JSON object. Otherwise strings are returned. Strings are - # the default. - # * *create_additions*: If set to false, the Parser doesn't create - # additions even if a matching class and create_id was found. This option - # defaults to false. - # * *object_class*: Defaults to Hash - # * *array_class*: Defaults to Array - def parse(source, opts = {}) - Parser.new(source, opts).parse + # :call-seq: + # JSON.parse(source, opts) -> object + # + # Returns the Ruby objects created by parsing the given +source+. + # + # Argument +source+ contains the \String to be parsed. + # + # Argument +opts+, if given, contains a \Hash of options for the parsing. + # See {Parsing Options}[#module-JSON-label-Parsing+Options]. + # + # --- + # + # When +source+ is a \JSON array, returns a Ruby \Array: + # source = '["foo", 1.0, true, false, null]' + # ruby = JSON.parse(source) + # ruby # => ["foo", 1.0, true, false, nil] + # ruby.class # => Array + # + # When +source+ is a \JSON object, returns a Ruby \Hash: + # source = '{"a": "foo", "b": 1.0, "c": true, "d": false, "e": null}' + # ruby = JSON.parse(source) + # ruby # => {"a"=>"foo", "b"=>1.0, "c"=>true, "d"=>false, "e"=>nil} + # ruby.class # => Hash + # + # For examples of parsing for all \JSON data types, see + # {Parsing \JSON}[#module-JSON-label-Parsing+JSON]. + # + # Parses nested JSON objects: + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON + # ruby = JSON.parse(source) + # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # --- + # + # Raises an exception if +source+ is not valid JSON: + # # Raises JSON::ParserError (783: unexpected token at ''): + # JSON.parse('') + # + def parse(source, opts = nil) + opts = ParserOptions.prepare(opts) unless opts.nil? + Parser.parse(source, opts) + end + + PARSE_L_OPTIONS = { + max_nesting: false, + allow_nan: true, + }.freeze + private_constant :PARSE_L_OPTIONS + + # :call-seq: + # JSON.parse!(source, opts) -> object + # + # Calls + # parse(source, opts) + # with +source+ and possibly modified +opts+. + # + # Differences from JSON.parse: + # - Option +max_nesting+, if not provided, defaults to +false+, + # which disables checking for nesting depth. + # - Option +allow_nan+, if not provided, defaults to +true+. + def parse!(source, opts = nil) + if opts.nil? + parse(source, PARSE_L_OPTIONS) + else + parse(source, PARSE_L_OPTIONS.merge(opts)) + end + end + + # :call-seq: + # JSON.load_file(path, opts={}) -> object + # + # Calls: + # parse(File.read(path), opts) + # + # See method #parse. + def load_file(filespec, opts = nil) + parse(File.read(filespec, encoding: Encoding::UTF_8), opts) end - # Parse the JSON document _source_ into a Ruby data structure and return it. - # The bang version of the parse method defaults to the more dangerous values - # for the _opts_ hash, so be sure only to parse trusted _source_ documents. - # - # _opts_ can have the following keys: - # * *max_nesting*: The maximum depth of nesting allowed in the parsed data - # structures. Enable depth checking with :max_nesting => anInteger. The - # parse! methods defaults to not doing max depth checking: This can be - # dangerous if someone wants to fill up your stack. - # * *allow_nan*: If set to true, allow NaN, Infinity, and -Infinity in - # defiance of RFC 7159 to be parsed by the Parser. This option defaults - # to true. - # * *create_additions*: If set to false, the Parser doesn't create - # additions even if a matching class and create_id was found. This option - # defaults to false. - def parse!(source, opts = {}) - opts = { - :max_nesting => false, - :allow_nan => true - }.merge(opts) - Parser.new(source, opts).parse + # :call-seq: + # JSON.load_file!(path, opts = {}) + # + # Calls: + # JSON.parse!(File.read(path, opts)) + # + # See method #parse! + def load_file!(filespec, opts = nil) + parse!(File.read(filespec, encoding: Encoding::UTF_8), opts) end - # Generate a JSON document from the Ruby data structure _obj_ and return - # it. _state_ is * a JSON::State object, - # * or a Hash like object (responding to to_hash), - # * an object convertible into a hash by a to_h method, - # that is used as or to configure a State object. - # - # It defaults to a state object, that creates the shortest possible JSON text - # in one line, checks for circular data structures and doesn't allow NaN, - # Infinity, and -Infinity. - # - # A _state_ hash can have the following keys: - # * *indent*: a string used to indent levels (default: ''), - # * *space*: a string that is put after, a : or , delimiter (default: ''), - # * *space_before*: a string that is put before a : pair delimiter (default: ''), - # * *object_nl*: a string that is put at the end of a JSON object (default: ''), - # * *array_nl*: a string that is put at the end of a JSON array (default: ''), - # * *allow_nan*: true if NaN, Infinity, and -Infinity should be - # generated, otherwise an exception is thrown if these values are - # encountered. This options defaults to false. - # * *max_nesting*: The maximum depth of nesting allowed in the data - # structures from which JSON is to be generated. Disable depth checking - # with :max_nesting => false, it defaults to 100. - # - # See also the fast_generate for the fastest creation method with the least - # amount of sanity checks, and the pretty_generate method for some - # defaults for pretty output. + # :call-seq: + # JSON.generate(obj, opts = nil) -> new_string + # + # Returns a \String containing the generated \JSON data. + # + # See also JSON.pretty_generate. + # + # Argument +obj+ is the Ruby object to be converted to \JSON. + # + # Argument +opts+, if given, contains a \Hash of options for the generation. + # See {Generating Options}[#module-JSON-label-Generating+Options]. + # + # --- + # + # When +obj+ is an \Array, returns a \String containing a \JSON array: + # obj = ["foo", 1.0, true, false, nil] + # json = JSON.generate(obj) + # json # => '["foo",1.0,true,false,null]' + # + # When +obj+ is a \Hash, returns a \String containing a \JSON object: + # obj = {foo: 0, bar: 's', baz: :bat} + # json = JSON.generate(obj) + # json # => '{"foo":0,"bar":"s","baz":"bat"}' + # + # For examples of generating from other Ruby objects, see + # {Generating \JSON from Other Objects}[#module-JSON-label-Generating+JSON+from+Other+Objects]. + # + # --- + # + # Raises an exception if any formatting option is not a \String. + # + # Raises an exception if +obj+ contains circular references: + # a = []; b = []; a.push(b); b.push(a) + # # Raises JSON::NestingError (nesting of 100 is too deep): + # JSON.generate(a) + # def generate(obj, opts = nil) if State === opts - state, opts = opts, nil + opts.generate(obj) else - state = SAFE_STATE_PROTOTYPE.dup + State.generate(obj, opts, nil) end - if opts - if opts.respond_to? :to_hash - opts = opts.to_hash - elsif opts.respond_to? :to_h - opts = opts.to_h - else - raise TypeError, "can't convert #{opts.class} into Hash" - end - state = state.configure(opts) - end - state.generate(obj) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and - # later delete them. - alias unparse generate - module_function :unparse - # :startdoc: - - # Generate a JSON document from the Ruby data structure _obj_ and return it. - # This method disables the checks for circles in Ruby objects. + # :call-seq: + # JSON.fast_generate(obj, opts) -> new_string + # + # Arguments +obj+ and +opts+ here are the same as + # arguments +obj+ and +opts+ in JSON.generate. # - # *WARNING*: Be careful not to pass any Ruby data structures with circles as - # _obj_ argument because this will cause JSON to go into an infinite loop. + # By default, generates \JSON data without checking + # for circular references in +obj+ (option +max_nesting+ set to +false+, disabled). + # + # Raises an exception if +obj+ contains circular references: + # a = []; b = []; a.push(b); b.push(a) + # # Raises SystemStackError (stack level too deep): + # JSON.fast_generate(a) def fast_generate(obj, opts = nil) - if State === opts - state, opts = opts, nil + if RUBY_VERSION >= "3.0" + warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated else - state = FAST_STATE_PROTOTYPE.dup + warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 end - if opts - if opts.respond_to? :to_hash - opts = opts.to_hash - elsif opts.respond_to? :to_h - opts = opts.to_h - else - raise TypeError, "can't convert #{opts.class} into Hash" - end - state.configure(opts) - end - state.generate(obj) + generate(obj, opts) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias fast_unparse fast_generate - module_function :fast_unparse - # :startdoc: + PRETTY_GENERATE_OPTIONS = { + indent: ' ', + space: ' ', + object_nl: "\n", + array_nl: "\n", + }.freeze + private_constant :PRETTY_GENERATE_OPTIONS - # Generate a JSON document from the Ruby data structure _obj_ and return it. - # The returned document is a prettier form of the document returned by - # #unparse. + # :call-seq: + # JSON.pretty_generate(obj, opts = nil) -> new_string + # + # Arguments +obj+ and +opts+ here are the same as + # arguments +obj+ and +opts+ in JSON.generate. + # + # Default options are: + # { + # indent: ' ', # Two spaces + # space: ' ', # One space + # array_nl: "\n", # Newline + # object_nl: "\n" # Newline + # } + # + # Example: + # obj = {foo: [:bar, :baz], bat: {bam: 0, bad: 1}} + # json = JSON.pretty_generate(obj) + # puts json + # Output: + # { + # "foo": [ + # "bar", + # "baz" + # ], + # "bat": { + # "bam": 0, + # "bad": 1 + # } + # } # - # The _opts_ argument can be used to configure the generator. See the - # generate method for a more detailed explanation. def pretty_generate(obj, opts = nil) - if State === opts - state, opts = opts, nil - else - state = PRETTY_STATE_PROTOTYPE.dup - end + return opts.generate(obj) if State === opts + + options = PRETTY_GENERATE_OPTIONS + if opts - if opts.respond_to? :to_hash - opts = opts.to_hash - elsif opts.respond_to? :to_h - opts = opts.to_h - else - raise TypeError, "can't convert #{opts.class} into Hash" + unless opts.is_a?(Hash) + if opts.respond_to? :to_hash + opts = opts.to_hash + elsif opts.respond_to? :to_h + opts = opts.to_h + else + raise TypeError, "can't convert #{opts.class} into Hash" + end end - state.configure(opts) + options = options.merge(opts) end - state.generate(obj) + + State.generate(obj, options, nil) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias pretty_unparse pretty_generate - module_function :pretty_unparse - # :startdoc: + # Sets or returns default options for the JSON.unsafe_load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + deprecated_singleton_attr_accessor :unsafe_load_default_options - class << self - # The global default options for the JSON.load method: - # :max_nesting: false - # :allow_nan: true - # :allow_blank: true - attr_accessor :load_default_options - end - self.load_default_options = { + @unsafe_load_default_options = { :max_nesting => false, :allow_nan => true, - :allow_blank => true, + :allow_blank => true, :create_additions => true, } - # Load a ruby data structure from a JSON _source_ and return it. A source can - # either be a string-like object, an IO-like object, or an object responding - # to the read method. If _proc_ was given, it will be called with any nested - # Ruby object as an argument recursively in depth first order. To modify the - # default options pass in the optional _options_ argument as well. + # Sets or returns default options for the JSON.load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + deprecated_singleton_attr_accessor :load_default_options + + @load_default_options = { + :allow_nan => true, + :allow_blank => true, + :create_additions => nil, + } + # :call-seq: + # JSON.unsafe_load(source, options = {}) -> object + # JSON.unsafe_load(source, proc = nil, options = {}) -> object + # + # Returns the Ruby objects created by parsing the given +source+. # # BEWARE: This method is meant to serialise data from trusted user input, # like from your own database server or clients under your control, it could - # be dangerous to allow untrusted users to pass JSON sources into it. The - # default options for the parser can be changed via the load_default_options - # method. - # - # This method is part of the implementation of the load/dump interface of - # Marshal and YAML. - def load(source, proc = nil, options = {}) - opts = load_default_options.merge options - if source.respond_to? :to_str - source = source.to_str - elsif source.respond_to? :to_io - source = source.to_io.read - elsif source.respond_to?(:read) - source = source.read + # be dangerous to allow untrusted users to pass JSON sources into it. + # + # - Argument +source+ must be, or be convertible to, a \String: + # - If +source+ responds to instance method +to_str+, + # <tt>source.to_str</tt> becomes the source. + # - If +source+ responds to instance method +to_io+, + # <tt>source.to_io.read</tt> becomes the source. + # - If +source+ responds to instance method +read+, + # <tt>source.read</tt> becomes the source. + # - If both of the following are true, source becomes the \String <tt>'null'</tt>: + # - Option +allow_blank+ specifies a truthy value. + # - The source, as defined above, is +nil+ or the empty \String <tt>''</tt>. + # - Otherwise, +source+ remains the source. + # - Argument +proc+, if given, must be a \Proc that accepts one argument. + # It will be called recursively with each result (depth-first order). + # See details below. + # - Argument +opts+, if given, contains a \Hash of options for the parsing. + # See {Parsing Options}[#module-JSON-label-Parsing+Options]. + # The default options can be changed via method JSON.unsafe_load_default_options=. + # + # --- + # + # When no +proc+ is given, modifies +source+ as above and returns the result of + # <tt>parse(source, opts)</tt>; see #parse. + # + # Source for following examples: + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON + # + # Load a \String: + # ruby = JSON.unsafe_load(source) + # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load an \IO object: + # require 'stringio' + # object = JSON.unsafe_load(StringIO.new(source)) + # object # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load a \File object: + # path = 't.json' + # File.write(path, source) + # File.open(path) do |file| + # JSON.unsafe_load(file) + # end # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # --- + # + # When +proc+ is given: + # - Modifies +source+ as above. + # - Gets the +result+ from calling <tt>parse(source, opts)</tt>. + # - Recursively calls <tt>proc(result)</tt>. + # - Returns the final result. + # + # Example: + # require 'json' + # + # # Some classes for the example. + # class Base + # def initialize(attributes) + # @attributes = attributes + # end + # end + # class User < Base; end + # class Account < Base; end + # class Admin < Base; end + # # The JSON source. + # json = <<-EOF + # { + # "users": [ + # {"type": "User", "username": "jane", "email": "jane@example.com"}, + # {"type": "User", "username": "john", "email": "john@example.com"} + # ], + # "accounts": [ + # {"account": {"type": "Account", "paid": true, "account_id": "1234"}}, + # {"account": {"type": "Account", "paid": false, "account_id": "1235"}} + # ], + # "admins": {"type": "Admin", "password": "0wn3d"} + # } + # EOF + # # Deserializer method. + # def deserialize_obj(obj, safe_types = %w(User Account Admin)) + # type = obj.is_a?(Hash) && obj["type"] + # safe_types.include?(type) ? Object.const_get(type).new(obj) : obj + # end + # # Call to JSON.unsafe_load + # ruby = JSON.unsafe_load(json, proc {|obj| + # case obj + # when Hash + # obj.each {|k, v| obj[k] = deserialize_obj v } + # when Array + # obj.map! {|v| deserialize_obj v } + # end + # obj + # }) + # pp ruby + # Output: + # {"users"=> + # [#<User:0x00000000064c4c98 + # @attributes= + # {"type"=>"User", "username"=>"jane", "email"=>"jane@example.com"}>, + # #<User:0x00000000064c4bd0 + # @attributes= + # {"type"=>"User", "username"=>"john", "email"=>"john@example.com"}>], + # "accounts"=> + # [{"account"=> + # #<Account:0x00000000064c4928 + # @attributes={"type"=>"Account", "paid"=>true, "account_id"=>"1234"}>}, + # {"account"=> + # #<Account:0x00000000064c4680 + # @attributes={"type"=>"Account", "paid"=>false, "account_id"=>"1235"}>}], + # "admins"=> + # #<Admin:0x00000000064c41f8 + # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>} + # + def unsafe_load(source, proc = nil, options = nil) + opts = if options.nil? + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _unsafe_load_default_options + end + else + _unsafe_load_default_options.merge(options) end + + unless source.is_a?(String) + if source.respond_to? :to_str + source = source.to_str + elsif source.respond_to? :to_io + source = source.to_io.read + elsif source.respond_to?(:read) + source = source.read + end + end + if opts[:allow_blank] && (source.nil? || source.empty?) source = 'null' end - result = parse(source, opts) - recurse_proc(result, &proc) if proc - result + + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc + end + + parse(source, opts) end - # Recursively calls passed _Proc_ if the parsed data structure is an _Array_ or _Hash_ - def recurse_proc(result, &proc) - case result - when Array - result.each { |x| recurse_proc x, &proc } - proc.call result - when Hash - result.each { |x, y| recurse_proc x, &proc; recurse_proc y, &proc } - proc.call result + # :call-seq: + # JSON.load(source, options = {}) -> object + # JSON.load(source, proc = nil, options = {}) -> object + # + # Returns the Ruby objects created by parsing the given +source+. + # + # BEWARE: This method is meant to serialise data from trusted user input, + # like from your own database server or clients under your control, it could + # be dangerous to allow untrusted users to pass JSON sources into it. + # If you must use it, use JSON.unsafe_load instead to make it clear. + # + # Since JSON version 2.8.0, `load` emits a deprecation warning when a + # non native type is deserialized, without `create_additions` being explicitly + # enabled, and in JSON version 3.0, `load` will have `create_additions` disabled + # by default. + # + # - Argument +source+ must be, or be convertible to, a \String: + # - If +source+ responds to instance method +to_str+, + # <tt>source.to_str</tt> becomes the source. + # - If +source+ responds to instance method +to_io+, + # <tt>source.to_io.read</tt> becomes the source. + # - If +source+ responds to instance method +read+, + # <tt>source.read</tt> becomes the source. + # - If both of the following are true, source becomes the \String <tt>'null'</tt>: + # - Option +allow_blank+ specifies a truthy value. + # - The source, as defined above, is +nil+ or the empty \String <tt>''</tt>. + # - Otherwise, +source+ remains the source. + # - Argument +proc+, if given, must be a \Proc that accepts one argument. + # It will be called recursively with each result (depth-first order). + # See details below. + # - Argument +opts+, if given, contains a \Hash of options for the parsing. + # See {Parsing Options}[#module-JSON-label-Parsing+Options]. + # The default options can be changed via method JSON.load_default_options=. + # + # --- + # + # When no +proc+ is given, modifies +source+ as above and returns the result of + # <tt>parse(source, opts)</tt>; see #parse. + # + # Source for following examples: + # source = <<~JSON + # { + # "name": "Dave", + # "age" :40, + # "hats": [ + # "Cattleman's", + # "Panama", + # "Tophat" + # ] + # } + # JSON + # + # Load a \String: + # ruby = JSON.load(source) + # ruby # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load an \IO object: + # require 'stringio' + # object = JSON.load(StringIO.new(source)) + # object # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # Load a \File object: + # path = 't.json' + # File.write(path, source) + # File.open(path) do |file| + # JSON.load(file) + # end # => {"name"=>"Dave", "age"=>40, "hats"=>["Cattleman's", "Panama", "Tophat"]} + # + # --- + # + # When +proc+ is given: + # - Modifies +source+ as above. + # - Gets the +result+ from calling <tt>parse(source, opts)</tt>. + # - Recursively calls <tt>proc(result)</tt>. + # - Returns the final result. + # + # Example: + # require 'json' + # + # # Some classes for the example. + # class Base + # def initialize(attributes) + # @attributes = attributes + # end + # end + # class User < Base; end + # class Account < Base; end + # class Admin < Base; end + # # The JSON source. + # json = <<-EOF + # { + # "users": [ + # {"type": "User", "username": "jane", "email": "jane@example.com"}, + # {"type": "User", "username": "john", "email": "john@example.com"} + # ], + # "accounts": [ + # {"account": {"type": "Account", "paid": true, "account_id": "1234"}}, + # {"account": {"type": "Account", "paid": false, "account_id": "1235"}} + # ], + # "admins": {"type": "Admin", "password": "0wn3d"} + # } + # EOF + # # Deserializer method. + # def deserialize_obj(obj, safe_types = %w(User Account Admin)) + # type = obj.is_a?(Hash) && obj["type"] + # safe_types.include?(type) ? Object.const_get(type).new(obj) : obj + # end + # # Call to JSON.load + # ruby = JSON.load(json, proc {|obj| + # case obj + # when Hash + # obj.each {|k, v| obj[k] = deserialize_obj v } + # when Array + # obj.map! {|v| deserialize_obj v } + # end + # obj + # }) + # pp ruby + # Output: + # {"users"=> + # [#<User:0x00000000064c4c98 + # @attributes= + # {"type"=>"User", "username"=>"jane", "email"=>"jane@example.com"}>, + # #<User:0x00000000064c4bd0 + # @attributes= + # {"type"=>"User", "username"=>"john", "email"=>"john@example.com"}>], + # "accounts"=> + # [{"account"=> + # #<Account:0x00000000064c4928 + # @attributes={"type"=>"Account", "paid"=>true, "account_id"=>"1234"}>}, + # {"account"=> + # #<Account:0x00000000064c4680 + # @attributes={"type"=>"Account", "paid"=>false, "account_id"=>"1235"}>}], + # "admins"=> + # #<Admin:0x00000000064c41f8 + # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>} + # + def load(source, proc = nil, options = nil) + if proc && options.nil? && proc.is_a?(Hash) + options = proc + proc = nil + end + + opts = if options.nil? + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _load_default_options + end else - proc.call result + _load_default_options.merge(options) end - end - alias restore load - module_function :restore + unless source.is_a?(String) + if source.respond_to? :to_str + source = source.to_str + elsif source.respond_to? :to_io + source = source.to_io.read + elsif source.respond_to?(:read) + source = source.read + end + end - class << self - # The global default options for the JSON.dump method: - # :max_nesting: false - # :allow_nan: true - # :allow_blank: true - attr_accessor :dump_default_options + if opts[:allow_blank] && (source.nil? || source.empty?) + source = 'null' + end + + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc + end + + parse(source, opts) end - self.dump_default_options = { + + # Sets or returns the default options for the JSON.dump method. + # Initially: + # opts = JSON.dump_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true} + deprecated_singleton_attr_accessor :dump_default_options + @dump_default_options = { :max_nesting => false, :allow_nan => true, } - # Dumps _obj_ as a JSON string, i.e. calls generate on the object and returns - # the result. + # :call-seq: + # JSON.dump(obj, io = nil, limit = nil) + # + # Dumps +obj+ as a \JSON string, i.e. calls generate on the object and returns the result. # - # If anIO (an IO-like object or an object that responds to the write method) - # was given, the resulting JSON is written to it. + # The default options can be changed via method JSON.dump_default_options. # - # If the number of nested arrays or objects exceeds _limit_, an ArgumentError - # exception is raised. This argument is similar (but not exactly the - # same!) to the _limit_ argument in Marshal.dump. + # - Argument +io+, if given, should respond to method +write+; + # the \JSON \String is written to +io+, and +io+ is returned. + # If +io+ is not given, the \JSON \String is returned. + # - Argument +limit+, if given, is passed to JSON.generate as option +max_nesting+. # - # The default options for the generator can be changed via the - # dump_default_options method. + # --- # - # This method is part of the implementation of the load/dump interface of - # Marshal and YAML. - def dump(obj, anIO = nil, limit = nil) - if anIO and limit.nil? - anIO = anIO.to_io if anIO.respond_to?(:to_io) - unless anIO.respond_to?(:write) - limit = anIO - anIO = nil + # When argument +io+ is not given, returns the \JSON \String generated from +obj+: + # obj = {foo: [0, 1], bar: {baz: 2, bat: 3}, bam: :bad} + # json = JSON.dump(obj) + # json # => "{\"foo\":[0,1],\"bar\":{\"baz\":2,\"bat\":3},\"bam\":\"bad\"}" + # + # When argument +io+ is given, writes the \JSON \String to +io+ and returns +io+: + # path = 't.json' + # File.open(path, 'w') do |file| + # JSON.dump(obj, file) + # end # => #<File:t.json (closed)> + # puts File.read(path) + # Output: + # {"foo":[0,1],"bar":{"baz":2,"bat":3},"bam":"bad"} + def dump(obj, anIO = nil, limit = nil, kwargs = nil) + if kwargs.nil? + if limit.nil? + if anIO.is_a?(Hash) + kwargs = anIO + anIO = nil + end + elsif limit.is_a?(Hash) + kwargs = limit + limit = nil end end - opts = JSON.dump_default_options + + unless anIO.nil? + if anIO.respond_to?(:to_io) + anIO = anIO.to_io + elsif limit.nil? && !anIO.respond_to?(:write) + anIO, limit = nil, anIO + end + end + + opts = JSON._dump_default_options opts = opts.merge(:max_nesting => limit) if limit - result = generate(obj, opts) - if anIO - anIO.write result - anIO + opts = opts.merge(kwargs) if kwargs + + begin + State.generate(obj, opts, anIO) + rescue JSON::NestingError + raise ArgumentError, "exceed depth limit" + end + end + + # :stopdoc: + # All these were meant to be deprecated circa 2009, but were just set as undocumented + # so usage still exist in the wild. + def unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated + else + warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 + end + generate(...) + end + module_function :unparse + + def fast_unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated + else + warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 + end + generate(...) + end + module_function :fast_unparse + + def pretty_unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + pretty_generate(...) + end + module_function :fast_unparse + + def restore(...) + if RUBY_VERSION >= "3.0" + warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1, category: :deprecated else - result + warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1 + end + load(...) + end + module_function :restore + + class << self + private + + def const_missing(const_name) + case const_name + when :PRETTY_STATE_PROTOTYPE + if RUBY_VERSION >= "3.0" + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + state.new(PRETTY_GENERATE_OPTIONS) + else + super + end end - rescue JSON::NestingError - raise ArgumentError, "exceed depth limit" end + # :startdoc: + + # JSON::Coder holds a parser and generator configuration. + # + # module MyApp + # JSONC_CODER = JSON::Coder.new( + # allow_trailing_comma: true + # ) + # end + # + # MyApp::JSONC_CODER.load(document) + # + class Coder + # :call-seq: + # JSON.new(options = nil, &block) + # + # Argument +options+, if given, contains a \Hash of options for both parsing and generating. + # See {Parsing Options}[#module-JSON-label-Parsing+Options], and {Generating Options}[#module-JSON-label-Generating+Options]. + # + # For generation, the <tt>strict: true</tt> option is always set. When a Ruby object with no native \JSON counterpart is + # encountered, the block provided to the initialize method is invoked, and must return a Ruby object that has a native + # \JSON counterpart: + # + # module MyApp + # API_JSON_CODER = JSON::Coder.new do |object| + # case object + # when Time + # object.iso8601(3) + # else + # object # Unknown type, will raise + # end + # end + # end + # + # puts MyApp::API_JSON_CODER.dump(Time.now.utc) # => "2025-01-21T08:41:44.286Z" + # + def initialize(options = nil, &as_json) + if options.nil? + options = { strict: true } + else + options = options.dup + options[:strict] = true + end + options[:as_json] = as_json if as_json + + @state = State.new(options).freeze + @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)).freeze + end + + # call-seq: + # dump(object) -> String + # dump(object, io) -> io + # + # Serialize the given object into a \JSON document. + def dump(object, io = nil) + @state.generate(object, io) + end + alias_method :generate, :dump + + # call-seq: + # load(string) -> Object + # + # Parse the given \JSON document and return an equivalent Ruby object. + def load(source) + @parser_config.parse(source) + end + alias_method :parse, :load - # Encodes string using Ruby's _String.encode_ - def self.iconv(to, from, string) - string.encode(to, from) + # call-seq: + # load(path) -> Object + # + # Parse the given \JSON document and return an equivalent Ruby object. + def load_file(path) + load(File.read(path, encoding: Encoding::UTF_8)) + end end end @@ -414,8 +1103,14 @@ module ::Kernel # Outputs _objs_ to STDOUT as JSON strings in the shortest form, that is in # one line. def j(*objs) + if RUBY_VERSION >= "3.0" + warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated + else + warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1 + end + objs.each do |obj| - puts JSON::generate(obj, :allow_nan => true, :max_nesting => false) + puts JSON.generate(obj, :allow_nan => true, :max_nesting => false) end nil end @@ -423,8 +1118,14 @@ module ::Kernel # Outputs _objs_ to STDOUT as JSON strings in a pretty format, with # indentation and over many lines. def jj(*objs) + if RUBY_VERSION >= "3.0" + warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated + else + warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1 + end + objs.each do |obj| - puts JSON::pretty_generate(obj, :allow_nan => true, :max_nesting => false) + puts JSON.pretty_generate(obj, :allow_nan => true, :max_nesting => false) end nil end @@ -435,22 +1136,7 @@ module ::Kernel # # The _opts_ argument is passed through to generate/parse respectively. See # generate and parse for their documentation. - def JSON(object, *args) - if object.respond_to? :to_str - JSON.parse(object.to_str, args.first) - else - JSON.generate(object, args.first) - end - end -end - -# Extends any Class to include _json_creatable?_ method. -class ::Class - # Returns true if this class can be used to create an instance - # from a serialised JSON string. The class has to implement a class - # method _json_create_ that expects a hash as first parameter. The hash - # should include the required data. - def json_creatable? - respond_to?(:json_create) + def JSON(object, opts = nil) + JSON[object, opts] end end diff --git a/ext/json/lib/json/ext.rb b/ext/json/lib/json/ext.rb index 7264a857fa..5bacc5e371 100644 --- a/ext/json/lib/json/ext.rb +++ b/ext/json/lib/json/ext.rb @@ -1,15 +1,45 @@ +# frozen_string_literal: true + require 'json/common' module JSON # This module holds all the modules/classes that implement JSON's # functionality as C extensions. module Ext + class Parser + class << self + def parse(...) + new(...).parse + end + alias_method :parse, :parse # Allow redefinition by extensions + end + + def initialize(source, opts = nil) + @source = source + @config = Config.new(opts) + end + + def source + @source.dup + end + + def parse + @config.parse(@source) + end + end + require 'json/ext/parser' - require 'json/ext/generator' - $DEBUG and warn "Using Ext extension for JSON." - JSON.parser = Parser - JSON.generator = Generator + Ext::Parser::Config = Ext::ParserConfig + JSON.parser = Ext::Parser + + if RUBY_ENGINE == 'truffleruby' + require 'json/truffle_ruby/generator' + JSON.generator = JSON::TruffleRuby::Generator + else + require 'json/ext/generator' + JSON.generator = Generator + end end - JSON_LOADED = true unless defined?(::JSON::JSON_LOADED) + JSON_LOADED = true unless defined?(JSON::JSON_LOADED) end diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb new file mode 100644 index 0000000000..ce5c185cab --- /dev/null +++ b/ext/json/lib/json/ext/generator/state.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +module JSON + module Ext + module Generator + class State + # call-seq: new(opts = {}) + # + # Instantiates a new State object, configured by _opts_. + # + # Argument +opts+, if given, contains a \Hash of options for the generation. + # See {Generating Options}[#module-JSON-label-Generating+Options]. + def initialize(opts = nil) + if opts && !opts.empty? + configure(opts) + end + end + + # call-seq: configure(opts) + # + # Configure this State instance with the Hash _opts_, and return + # itself. + def configure(opts) + unless opts.is_a?(Hash) + if opts.respond_to?(:to_hash) + opts = opts.to_hash + elsif opts.respond_to?(:to_h) + opts = opts.to_h + else + raise TypeError, "can't convert #{opts.class} into Hash" + end + end + _configure(opts) + end + + alias_method :merge, :configure + + # call-seq: to_h + # + # Returns the configuration instance variables as a hash, that can be + # passed to the configure method. + def to_h + result = { + indent: indent, + space: space, + space_before: space_before, + object_nl: object_nl, + array_nl: array_nl, + as_json: as_json, + allow_nan: allow_nan?, + ascii_only: ascii_only?, + max_nesting: max_nesting, + script_safe: script_safe?, + strict: strict?, + depth: depth, + buffer_initial_length: buffer_initial_length, + } + + allow_duplicate_key = allow_duplicate_key? + unless allow_duplicate_key.nil? + result[:allow_duplicate_key] = allow_duplicate_key + end + + instance_variables.each do |iv| + iv = iv.to_s[1..-1] + result[iv.to_sym] = self[iv] + end + + result + end + + alias_method :to_hash, :to_h + + # call-seq: [](name) + # + # Returns the value returned by method +name+. + def [](name) + ::JSON.deprecation_warning("JSON::State#[] is deprecated and will be removed in json 3.0.0") + + if respond_to?(name) + __send__(name) + else + instance_variable_get("@#{name}") if + instance_variables.include?("@#{name}".to_sym) # avoid warning + end + end + + # call-seq: []=(name, value) + # + # Sets the attribute name to value. + def []=(name, value) + ::JSON.deprecation_warning("JSON::State#[]= is deprecated and will be removed in json 3.0.0") + + if respond_to?(name_writer = "#{name}=") + __send__ name_writer, value + else + instance_variable_set "@#{name}", value + end + end + end + end + end +end diff --git a/ext/json/lib/json/generic_object.rb b/ext/json/lib/json/generic_object.rb index 108309db26..5c8ace354b 100644 --- a/ext/json/lib/json/generic_object.rb +++ b/ext/json/lib/json/generic_object.rb @@ -1,5 +1,9 @@ -#frozen_string_literal: false -require 'ostruct' +# frozen_string_literal: true +begin + require 'ostruct' +rescue LoadError + warn "JSON::GenericObject requires 'ostruct'. Please install it with `gem install ostruct`." +end module JSON class GenericObject < OpenStruct @@ -48,14 +52,6 @@ module JSON table end - def [](name) - __send__(name) - end unless method_defined?(:[]) - - def []=(name, value) - __send__("#{name}=", value) - end unless method_defined?(:[]=) - def |(other) self.class[other.to_hash.merge(to_hash)] end @@ -67,5 +63,5 @@ module JSON def to_json(*a) as_json.to_json(*a) end - end + end if defined?(::OpenStruct) end diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index 115eb5bb9a..631beba83e 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,9 +1,5 @@ -# frozen_string_literal: false +# frozen_string_literal: true + module JSON - # JSON version - VERSION = '2.2.0' - VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc: - VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc: - VERSION_MINOR = VERSION_ARRAY[1] # :nodoc: - VERSION_BUILD = VERSION_ARRAY[2] # :nodoc: + VERSION = '2.18.0' end diff --git a/ext/json/parser/depend b/ext/json/parser/depend index d0c9c2d2a6..d4737b1dfb 100644 --- a/ext/json/parser/depend +++ b/ext/json/parser/depend @@ -1,5 +1,5 @@ $(OBJS): $(ruby_headers) -parser.o: parser.c parser.h $(srcdir)/../fbuffer/fbuffer.h +parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h # AUTOGENERATED DEPENDENCIES START parser.o: $(RUBY_EXTCONF_H) @@ -7,9 +7,167 @@ parser.o: $(arch_hdrdir)/ruby/config.h parser.o: $(hdrdir)/ruby.h parser.o: $(hdrdir)/ruby/assert.h parser.o: $(hdrdir)/ruby/backward.h +parser.o: $(hdrdir)/ruby/backward/2/assume.h +parser.o: $(hdrdir)/ruby/backward/2/attributes.h +parser.o: $(hdrdir)/ruby/backward/2/bool.h +parser.o: $(hdrdir)/ruby/backward/2/inttypes.h +parser.o: $(hdrdir)/ruby/backward/2/limits.h +parser.o: $(hdrdir)/ruby/backward/2/long_long.h +parser.o: $(hdrdir)/ruby/backward/2/stdalign.h +parser.o: $(hdrdir)/ruby/backward/2/stdarg.h parser.o: $(hdrdir)/ruby/defines.h parser.o: $(hdrdir)/ruby/encoding.h parser.o: $(hdrdir)/ruby/intern.h +parser.o: $(hdrdir)/ruby/internal/abi.h +parser.o: $(hdrdir)/ruby/internal/anyargs.h +parser.o: $(hdrdir)/ruby/internal/arithmetic.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/char.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/double.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/int.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/long.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/short.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +parser.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +parser.o: $(hdrdir)/ruby/internal/assume.h +parser.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +parser.o: $(hdrdir)/ruby/internal/attr/artificial.h +parser.o: $(hdrdir)/ruby/internal/attr/cold.h +parser.o: $(hdrdir)/ruby/internal/attr/const.h +parser.o: $(hdrdir)/ruby/internal/attr/constexpr.h +parser.o: $(hdrdir)/ruby/internal/attr/deprecated.h +parser.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +parser.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +parser.o: $(hdrdir)/ruby/internal/attr/error.h +parser.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +parser.o: $(hdrdir)/ruby/internal/attr/forceinline.h +parser.o: $(hdrdir)/ruby/internal/attr/format.h +parser.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +parser.o: $(hdrdir)/ruby/internal/attr/noalias.h +parser.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +parser.o: $(hdrdir)/ruby/internal/attr/noexcept.h +parser.o: $(hdrdir)/ruby/internal/attr/noinline.h +parser.o: $(hdrdir)/ruby/internal/attr/nonnull.h +parser.o: $(hdrdir)/ruby/internal/attr/noreturn.h +parser.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +parser.o: $(hdrdir)/ruby/internal/attr/pure.h +parser.o: $(hdrdir)/ruby/internal/attr/restrict.h +parser.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +parser.o: $(hdrdir)/ruby/internal/attr/warning.h +parser.o: $(hdrdir)/ruby/internal/attr/weakref.h +parser.o: $(hdrdir)/ruby/internal/cast.h +parser.o: $(hdrdir)/ruby/internal/compiler_is.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +parser.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +parser.o: $(hdrdir)/ruby/internal/compiler_since.h +parser.o: $(hdrdir)/ruby/internal/config.h +parser.o: $(hdrdir)/ruby/internal/constant_p.h +parser.o: $(hdrdir)/ruby/internal/core.h +parser.o: $(hdrdir)/ruby/internal/core/rarray.h +parser.o: $(hdrdir)/ruby/internal/core/rbasic.h +parser.o: $(hdrdir)/ruby/internal/core/rbignum.h +parser.o: $(hdrdir)/ruby/internal/core/rclass.h +parser.o: $(hdrdir)/ruby/internal/core/rdata.h +parser.o: $(hdrdir)/ruby/internal/core/rfile.h +parser.o: $(hdrdir)/ruby/internal/core/rhash.h +parser.o: $(hdrdir)/ruby/internal/core/robject.h +parser.o: $(hdrdir)/ruby/internal/core/rregexp.h +parser.o: $(hdrdir)/ruby/internal/core/rstring.h +parser.o: $(hdrdir)/ruby/internal/core/rstruct.h +parser.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +parser.o: $(hdrdir)/ruby/internal/ctype.h +parser.o: $(hdrdir)/ruby/internal/dllexport.h +parser.o: $(hdrdir)/ruby/internal/dosish.h +parser.o: $(hdrdir)/ruby/internal/encoding/coderange.h +parser.o: $(hdrdir)/ruby/internal/encoding/ctype.h +parser.o: $(hdrdir)/ruby/internal/encoding/encoding.h +parser.o: $(hdrdir)/ruby/internal/encoding/pathname.h +parser.o: $(hdrdir)/ruby/internal/encoding/re.h +parser.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +parser.o: $(hdrdir)/ruby/internal/encoding/string.h +parser.o: $(hdrdir)/ruby/internal/encoding/symbol.h +parser.o: $(hdrdir)/ruby/internal/encoding/transcode.h +parser.o: $(hdrdir)/ruby/internal/error.h +parser.o: $(hdrdir)/ruby/internal/eval.h +parser.o: $(hdrdir)/ruby/internal/event.h +parser.o: $(hdrdir)/ruby/internal/fl_type.h +parser.o: $(hdrdir)/ruby/internal/gc.h +parser.o: $(hdrdir)/ruby/internal/glob.h +parser.o: $(hdrdir)/ruby/internal/globals.h +parser.o: $(hdrdir)/ruby/internal/has/attribute.h +parser.o: $(hdrdir)/ruby/internal/has/builtin.h +parser.o: $(hdrdir)/ruby/internal/has/c_attribute.h +parser.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +parser.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +parser.o: $(hdrdir)/ruby/internal/has/extension.h +parser.o: $(hdrdir)/ruby/internal/has/feature.h +parser.o: $(hdrdir)/ruby/internal/has/warning.h +parser.o: $(hdrdir)/ruby/internal/intern/array.h +parser.o: $(hdrdir)/ruby/internal/intern/bignum.h +parser.o: $(hdrdir)/ruby/internal/intern/class.h +parser.o: $(hdrdir)/ruby/internal/intern/compar.h +parser.o: $(hdrdir)/ruby/internal/intern/complex.h +parser.o: $(hdrdir)/ruby/internal/intern/cont.h +parser.o: $(hdrdir)/ruby/internal/intern/dir.h +parser.o: $(hdrdir)/ruby/internal/intern/enum.h +parser.o: $(hdrdir)/ruby/internal/intern/enumerator.h +parser.o: $(hdrdir)/ruby/internal/intern/error.h +parser.o: $(hdrdir)/ruby/internal/intern/eval.h +parser.o: $(hdrdir)/ruby/internal/intern/file.h +parser.o: $(hdrdir)/ruby/internal/intern/hash.h +parser.o: $(hdrdir)/ruby/internal/intern/io.h +parser.o: $(hdrdir)/ruby/internal/intern/load.h +parser.o: $(hdrdir)/ruby/internal/intern/marshal.h +parser.o: $(hdrdir)/ruby/internal/intern/numeric.h +parser.o: $(hdrdir)/ruby/internal/intern/object.h +parser.o: $(hdrdir)/ruby/internal/intern/parse.h +parser.o: $(hdrdir)/ruby/internal/intern/proc.h +parser.o: $(hdrdir)/ruby/internal/intern/process.h +parser.o: $(hdrdir)/ruby/internal/intern/random.h +parser.o: $(hdrdir)/ruby/internal/intern/range.h +parser.o: $(hdrdir)/ruby/internal/intern/rational.h +parser.o: $(hdrdir)/ruby/internal/intern/re.h +parser.o: $(hdrdir)/ruby/internal/intern/ruby.h +parser.o: $(hdrdir)/ruby/internal/intern/select.h +parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +parser.o: $(hdrdir)/ruby/internal/intern/set.h +parser.o: $(hdrdir)/ruby/internal/intern/signal.h +parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h +parser.o: $(hdrdir)/ruby/internal/intern/string.h +parser.o: $(hdrdir)/ruby/internal/intern/struct.h +parser.o: $(hdrdir)/ruby/internal/intern/thread.h +parser.o: $(hdrdir)/ruby/internal/intern/time.h +parser.o: $(hdrdir)/ruby/internal/intern/variable.h +parser.o: $(hdrdir)/ruby/internal/intern/vm.h +parser.o: $(hdrdir)/ruby/internal/interpreter.h +parser.o: $(hdrdir)/ruby/internal/iterator.h +parser.o: $(hdrdir)/ruby/internal/memory.h +parser.o: $(hdrdir)/ruby/internal/method.h +parser.o: $(hdrdir)/ruby/internal/module.h +parser.o: $(hdrdir)/ruby/internal/newobj.h +parser.o: $(hdrdir)/ruby/internal/scan_args.h +parser.o: $(hdrdir)/ruby/internal/special_consts.h +parser.o: $(hdrdir)/ruby/internal/static_assert.h +parser.o: $(hdrdir)/ruby/internal/stdalign.h +parser.o: $(hdrdir)/ruby/internal/stdbool.h +parser.o: $(hdrdir)/ruby/internal/stdckdint.h +parser.o: $(hdrdir)/ruby/internal/symbol.h +parser.o: $(hdrdir)/ruby/internal/value.h +parser.o: $(hdrdir)/ruby/internal/value_type.h +parser.o: $(hdrdir)/ruby/internal/variable.h +parser.o: $(hdrdir)/ruby/internal/warning_push.h +parser.o: $(hdrdir)/ruby/internal/xmalloc.h parser.o: $(hdrdir)/ruby/missing.h parser.o: $(hdrdir)/ruby/onigmo.h parser.o: $(hdrdir)/ruby/oniguruma.h @@ -17,7 +175,8 @@ parser.o: $(hdrdir)/ruby/ruby.h parser.o: $(hdrdir)/ruby/st.h parser.o: $(hdrdir)/ruby/subst.h parser.o: $(srcdir)/../fbuffer/fbuffer.h +parser.o: $(srcdir)/../json.h +parser.o: $(srcdir)/../simd/simd.h +parser.o: $(srcdir)/../vendor/ryu.h parser.o: parser.c -parser.o: parser.h -parser.o: parser.rl # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index f7360d46b2..2440e66d8b 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,6 +1,16 @@ -# frozen_string_literal: false +# frozen_string_literal: true require 'mkmf' -have_func("rb_enc_raise", "ruby.h") +$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" +have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 +have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 +have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 +have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby + +append_cflags("-std=c99") + +if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" +end create_makefile 'json/ext/parser' diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 6f0d31c2eb..f1ea1b6abb 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,69 +1,301 @@ +#include "../json.h" +#include "../vendor/ryu.h" +#include "../simd/simd.h" -#line 1 "parser.rl" -#include "../fbuffer/fbuffer.h" -#include "parser.h" +static VALUE mJSON, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze, + sym_decimal_class, sym_on_load, sym_allow_duplicate_key; + +static int binary_encindex; +static int utf8_encindex; -#if defined HAVE_RUBY_ENCODING_H -# define EXC_ENCODING rb_utf8_encoding(), -# ifndef HAVE_RB_ENC_RAISE +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby static void -enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) +rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) { - va_list args; - VALUE mesg; + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif - va_start(args, fmt); - mesg = rb_enc_vsprintf(enc, fmt, args); - va_end(args); +#ifndef HAVE_RB_HASH_NEW_CAPA +#define rb_hash_new_capa(n) rb_hash_new() +#endif - rb_exc_raise(rb_exc_new3(exc, mesg)); +#ifndef HAVE_RB_STR_TO_INTERNED_STR +static VALUE rb_str_to_interned_str(VALUE str) +{ + return rb_funcall(rb_str_freeze(str), i_uminus, 0); } -# define rb_enc_raise enc_raise +#endif + +/* name cache */ + +#include <string.h> +#include <ctype.h> + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); # endif -#else -# define EXC_ENCODING /* nothing */ -# define rb_enc_raise rb_raise +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +#define rstring_cache_memcmp memcmp + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +#if __has_builtin(__builtin_bswap64) +#undef rstring_cache_memcmp +ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length) +{ + // The libc memcmp has numerous complex optimizations, but in this particular case, + // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to + // inline a simpler memcmp outperforms calling the libc version. + long i = 0; + + for (; i + 8 <= length; i += 8) { + uint64_t a, b; + memcpy(&a, str + i, 8); + memcpy(&b, rptr + i, 8); + if (a != b) { + a = __builtin_bswap64(a); + b = __builtin_bswap64(b); + return (a < b) ? -1 : 1; + } + } + + for (; i < length; i++) { + if (str[i] != rptr[i]) { + return (str[i] < rptr[i]) ? -1 : 1; + } + } + + return 0; +} +#endif #endif -/* unicode */ +ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + const char *rstring_ptr; + long rstring_length; -static const char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 + RSTRING_GETMEM(rstring, rstring_ptr, rstring_length); + + if (length == rstring_length) { + return rstring_cache_memcmp(str, rstring_ptr, length); + } else { + return (int)(length - rstring_length); + } +} + +ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + int low = 0; + int high = cache->length - 1; + + while (low <= high) { + int mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + int cmp = rstring_cache_cmp(str, length, entry); + + if (cmp == 0) { + return entry; + } else if (cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + rvalue_cache_insert_at(cache, low, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + int low = 0; + int high = cache->length - 1; + + while (low <= high) { + int mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (cmp == 0) { + return entry; + } else if (cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + rvalue_cache_insert_at(cache, low, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, }; -static UTF32 unescape_unicode(const unsigned char *p) +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) { - char b; - UTF32 result = 0; - b = digit_values[p[0]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - return result; + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; + return value; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + long index; + for (index = 0; index < stack->head; index++) { + rb_gc_mark(stack->ptr[index]); + } } -static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + if (handle) { + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); + } +} + +static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) { int len = 1; if (ch <= 0x7F) { @@ -89,1602 +321,1079 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) return len; } -static VALUE mJSON, mExt, cParser, eParserError, eNestingError; -static VALUE CNaN, CInfinity, CMinusInfinity; -static VALUE cBigDecimal = Qundef; +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + +typedef struct JSON_ParserStruct { + VALUE on_load_proc; + VALUE decimal_class; + ID decimal_method_id; + enum duplicate_key_action on_duplicate_key; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool allow_control_characters; + bool symbolize_names; + bool freeze; +} JSON_ParserConfig; + +typedef struct JSON_ParserStateStruct { + VALUE stack_handle; + const char *start; + const char *cursor; + const char *end; + rvalue_stack *stack; + rvalue_cache name_cache; + int in_array; + int current_nesting; +} JSON_ParserState; + +static inline size_t rest(JSON_ParserState *state) { + return state->end - state->cursor; +} + +static inline bool eos(JSON_ParserState *state) { + return state->cursor >= state->end; +} + +static inline char peek(JSON_ParserState *state) +{ + if (RB_UNLIKELY(eos(state))) { + return 0; + } + return *state->cursor; +} + +static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out) +{ + const char *cursor = state->cursor; + long column = 0; + long line = 1; + + while (cursor >= state->start) { + if (*cursor-- == '\n') { + break; + } + column++; + } + + while (cursor >= state->start) { + if (*cursor-- == '\n') { + line++; + } + } + *line_out = line; + *column_out = column; +} -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, i_key_p, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_BigDecimal; +static void emit_parse_warning(const char *message, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column); + rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning); +} -#line 126 "parser.rl" +#define PARSE_ERROR_FRAGMENT_LEN 32 +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error(const char *format, JSON_ParserState *state) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; + long line, column; + cursor_position(state, &line, &column); + + const char *ptr = "EOF"; + if (state->cursor && state->cursor < state->end) { + ptr = state->cursor; + size_t len = 0; + while (len < PARSE_ERROR_FRAGMENT_LEN) { + char ch = ptr[len]; + if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') { + break; + } + len++; + } + if (len) { + buffer[0] = '\''; + MEMCPY(buffer + 1, ptr, char, len); -#line 108 "parser.c" -enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 27}; -enum {JSON_object_error = 0}; + while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte + len--; + } -enum {JSON_object_en_main = 1}; + if (buffer[len] >= 0xC0) { // multibyte character start + len--; + } + buffer[len + 1] = '\''; + buffer[len + 2] = '\0'; + ptr = (const char *)buffer; + } + } -#line 167 "parser.rl" + VALUE msg = rb_sprintf(format, ptr); + VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column); + RB_GC_GUARD(msg); + VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message); + rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line)); + rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column)); + rb_exc_raise(exc); +} -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at) { - int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; + state->cursor = at; + raise_parse_error(format, state); +} + +/* unicode */ + +static const signed char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); +static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe) +{ + if (RB_UNLIKELY(sp > spe - 4)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); - - -#line 132 "parser.c" - { - cs = JSON_object_start; - } - -#line 182 "parser.rl" - -#line 139 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 123 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 47: goto st23; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st2; - goto st0; -tr2: -#line 149 "parser.rl" - { - char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, p, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} + const unsigned char *p = (const unsigned char *)sp; + + const signed char b0 = digit_values[p[0]]; + const signed char b1 = digit_values[p[1]]; + const signed char b2 = digit_values[p[2]]; + const signed char b3 = digit_values[p[3]]; + + if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 180 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 58: goto st8; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr11; - case 45: goto tr11; - case 47: goto st19; - case 73: goto tr11; - case 78: goto tr11; - case 91: goto tr11; - case 102: goto tr11; - case 110: goto tr11; - case 116: goto tr11; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr11; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -tr11: -#line 134 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v, current_nesting); - if (np == NULL) { - p--; {p++; cs = 9; goto _out;} - } else { - if (NIL_P(json->object_class)) { - rb_hash_aset(*result, last_name, v); + + return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3; +} + +#define GET_PARSER_CONFIG \ + JSON_ParserConfig *config; \ + TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config) + +static const rb_data_type_t JSON_ParserConfig_type; + +static void +json_eat_comments(JSON_ParserState *state) +{ + const char *start = state->cursor; + state->cursor++; + + switch (peek(state)) { + case '/': { + state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); + if (!state->cursor) { + state->cursor = state->end; } else { - rb_funcall(*result, i_aset, 2, last_name, v); + state->cursor++; } - {p = (( np))-1;} + break; } - } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 267 "parser.c" - switch( (*p) ) { - case 13: goto st9; - case 32: goto st9; - case 44: goto st10; - case 47: goto st15; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st9; - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 34: goto tr2; - case 47: goto st11; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st12; - case 47: goto st14; - } - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 42 ) - goto st13; - goto st12; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st10; - } - goto st12; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 10 ) - goto st10; - goto st14; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - switch( (*p) ) { - case 42: goto st16; - case 47: goto st18; - } - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 42 ) - goto st17; - goto st16; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st9; - } - goto st16; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 10 ) - goto st9; - goto st18; -tr4: -#line 157 "parser.rl" - { p--; {p++; cs = 27; goto _out;} } - goto st27; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: -#line 363 "parser.c" - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st22; - } - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 42 ) - goto st21; - goto st20; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - switch( (*p) ) { - case 42: goto st21; - case 47: goto st8; - } - goto st20; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - if ( (*p) == 10 ) - goto st8; - goto st22; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - switch( (*p) ) { - case 42: goto st24; - case 47: goto st26; - } - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - if ( (*p) == 42 ) - goto st25; - goto st24; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st2; - } - goto st24; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - if ( (*p) == 10 ) - goto st2; - goto st26; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 183 "parser.rl" - - if (cs >= JSON_object_first_final) { - if (json->create_additions) { - VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); - } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - *result = rb_funcall(klass, i_json_create, 1, *result); + case '*': { + state->cursor++; + + while (true) { + const char *next_match = memchr(state->cursor, '*', state->end - state->cursor); + if (!next_match) { + raise_parse_error_at("unterminated comment, expected closing '*/'", state, start); + } + + state->cursor = next_match + 1; + if (peek(state) == '/') { + state->cursor++; + break; } } + break; + } + default: + raise_parse_error_at("unexpected token %s", state, start); + break; + } +} + +ALWAYS_INLINE(static) void +json_eat_whitespace(JSON_ParserState *state) +{ + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + if (chunk == 0x2020202020202020) { + state->cursor += 8; + continue; + } + + uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + state->cursor += consecutive_spaces; + break; + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; } - return p + 1; - } else { - return NULL; } } +static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); + } else { + result = rb_utf8_str_new(start, (long)(end - start)); + } +# else + result = rb_utf8_str_new(start, (long)(end - start)); + if (intern) { + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + return result; +} -#line 486 "parser.c" -enum {JSON_value_start = 1}; -enum {JSON_value_first_final = 29}; -enum {JSON_value_error = 0}; +static inline bool json_string_cacheable_p(const char *string, size_t length) +{ + // We mostly want to cache strings that are likely to be repeated. + // Simple heuristics: + // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold. + // - If the first character isn't a letter, we're much less likely to see this string again. + return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]); +} -enum {JSON_value_en_main = 1}; +static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + + if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); + } + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } -#line 283 "parser.rl" + return build_string(string, stringEnd, intern, symbolize); +} +#define JSON_MAX_UNESCAPE_POSITIONS 16 +typedef struct _json_unescape_positions { + long size; + const char **positions; + unsigned long additional_backslashes; +} JSON_UnescapePositions; -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) +static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions) { - int cs = EVIL; - - -#line 502 "parser.c" - { - cs = JSON_value_start; - } - -#line 290 "parser.rl" - -#line 509 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr3; - case 47: goto st6; - case 73: goto st10; - case 78: goto st17; - case 91: goto tr7; - case 102: goto st19; - case 110: goto st23; - case 116: goto st26; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr3; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 235 "parser.rl" - { - char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + while (positions->size) { + positions->size--; + const char *next_position = positions->positions[0]; + positions->positions++; + if (next_position >= pe) { + return next_position; + } } - goto st29; -tr3: -#line 240 "parser.rl" - { - char *np; - if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - {p = (( p + 10))-1;} - p--; {p++; cs = 29; goto _out;} - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); + + if (positions->additional_backslashes) { + positions->additional_backslashes--; + return memchr(pe, '\\', stringEnd - pe); + } + + return NULL; +} + +NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + const char *p = string, *pe = string, *bufferStart; + char *buffer; + + VALUE result = rb_str_buf_new(bufferSize); + rb_enc_associate_index(result, utf8_encindex); + buffer = RSTRING_PTR(result); + bufferStart = buffer; + +#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe; + + while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) { + if (pe > p) { + MEMCPY(buffer, p, char, pe - p); + buffer += pe - p; + } + switch (*++pe) { + case '"': + case '/': + p = pe; // nothing to unescape just need to skip the backslash + break; + case '\\': + APPEND_CHAR('\\'); + break; + case 'n': + APPEND_CHAR('\n'); + break; + case 'r': + APPEND_CHAR('\r'); + break; + case 't': + APPEND_CHAR('\t'); + break; + case 'b': + APPEND_CHAR('\b'); + break; + case 'f': + APPEND_CHAR('\f'); + break; + case 'u': { + uint32_t ch = unescape_unicode(state, ++pe, stringEnd); + pe += 3; + /* To handle values above U+FFFF, we take a sequence of + * \uXXXX escapes in the U+D800..U+DBFF then + * U+DC00..U+DFFF ranges, take the low 10 bits from each + * to make a 20-bit number, then add 0x10000 to get the + * final codepoint. + * + * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling + * Surrogate Pairs in UTF-16", and 23.6 "Surrogates + * Area". + */ + if ((ch & 0xFC00) == 0xD800) { + pe++; + if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) { + uint32_t sur = unescape_unicode(state, pe + 2, stringEnd); + + if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) { + raise_parse_error_at("invalid surrogate pair at %s", state, p); + } + + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); + pe += 5; + } else { + raise_parse_error_at("incomplete surrogate pair at %s", state, p); + break; + } + } + + int unescape_len = convert_UTF32_to_UTF8(buffer, ch); + buffer += unescape_len; + p = ++pe; + break; } + default: + if ((unsigned char)*pe < 0x20) { + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); + } + break; } - np = JSON_parse_float(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} - np = JSON_parse_integer(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} - p--; {p++; cs = 29; goto _out;} } - goto st29; -tr7: -#line 258 "parser.rl" - { - char *np; - np = JSON_parse_array(json, p, pe, result, current_nesting + 1); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} +#undef APPEND_CHAR + + if (stringEnd > p) { + MEMCPY(buffer, p, char, stringEnd - p); + buffer += stringEnd - p; } - goto st29; -tr11: -#line 264 "parser.rl" - { - char *np; - np = JSON_parse_object(json, p, pe, result, current_nesting + 1); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + rb_str_set_len(result, buffer - bufferStart); + + if (symbolize) { + result = rb_str_intern(result); + } else if (intern) { + result = rb_str_to_interned_str(result); } - goto st29; -tr25: -#line 228 "parser.rl" - { - if (json->allow_nan) { - *result = CInfinity; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); + + return result; +} + +#define MAX_FAST_INTEGER_SIZE 18 + +static VALUE json_decode_large_integer(const char *start, long len) +{ + VALUE buffer_v; + char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = rb_cstr2inum(buffer, 10); + RB_ALLOCV_END(buffer_v); + return number; +} + +static inline VALUE +json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end) +{ + if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) { + if (negative) { + return INT64T2NUM(-((int64_t)mantissa)); } + return UINT64T2NUM(mantissa); } - goto st29; -tr27: -#line 221 "parser.rl" - { - if (json->allow_nan) { - *result = CNaN; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); - } + + return json_decode_large_integer(start, end - start); +} + +static VALUE json_decode_large_float(const char *start, long len) +{ + if (RB_LIKELY(len < 64)) { + char buffer[64]; + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + return DBL2NUM(rb_cstr_to_dbl(buffer, 1)); } - goto st29; -tr31: -#line 215 "parser.rl" - { - *result = Qfalse; + + VALUE buffer_v; + char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1)); + RB_ALLOCV_END(buffer_v); + return number; +} + +/* Ruby JSON optimized float decoder using vendored Ryu algorithm + * Accepts pre-extracted mantissa and exponent from first-pass validation + */ +static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative, + const char *start, const char *end) +{ + if (RB_UNLIKELY(config->decimal_class)) { + VALUE text = rb_str_new(start, end - start); + return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text); } - goto st29; -tr34: -#line 212 "parser.rl" - { - *result = Qnil; + + // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case) + // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308) + if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) { + return json_decode_large_float(start, end - start); } - goto st29; -tr37: -#line 218 "parser.rl" - { - *result = Qtrue; + + return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative)); +} + +static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count) +{ + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); + rvalue_stack_pop(state->stack, count); + + if (config->freeze) { + RB_OBJ_FREEZE(array); } - goto st29; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: -#line 270 "parser.rl" - { p--; {p++; cs = 29; goto _out;} } -#line 629 "parser.c" - switch( (*p) ) { - case 13: goto st29; - case 32: goto st29; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st29; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st29; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st29; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 110 ) - goto st11; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - if ( (*p) == 102 ) - goto st12; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 105 ) - goto st13; - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 110 ) - goto st14; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 105 ) - goto st15; - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 116 ) - goto st16; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 121 ) - goto tr25; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 97 ) - goto st18; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 78 ) - goto tr27; - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 97 ) - goto st20; - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 108 ) - goto st21; - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 115 ) - goto st22; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - if ( (*p) == 101 ) - goto tr31; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - if ( (*p) == 117 ) - goto st24; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - if ( (*p) == 108 ) - goto st25; - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 108 ) - goto tr34; - goto st0; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - if ( (*p) == 114 ) - goto st27; - goto st0; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 117 ) - goto st28; - goto st0; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - if ( (*p) == 101 ) - goto tr37; - goto st0; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 291 "parser.rl" - - if (cs >= JSON_value_first_final) { - return p; - } else { - return NULL; + + return array; +} + +static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs) +{ + VALUE set = rb_hash_new_capa(count / 2); + for (size_t index = 0; index < count; index += 2) { + size_t before = RHASH_SIZE(set); + VALUE key = pairs[index]; + rb_hash_aset(set, key, Qtrue); + if (RHASH_SIZE(set) == before) { + if (RB_SYMBOL_P(key)) { + return rb_sym2str(key); + } + return key; + } } + return Qfalse; +} + +static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", + rb_inspect(duplicate_key) + ); + + emit_parse_warning(RSTRING_PTR(message), state); + RB_GC_GUARD(message); } +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "duplicate key %"PRIsVALUE, + rb_inspect(duplicate_key) + ); -#line 880 "parser.c" -enum {JSON_integer_start = 1}; -enum {JSON_integer_first_final = 3}; -enum {JSON_integer_error = 0}; + raise_parse_error(RSTRING_PTR(message), state); + RB_GC_GUARD(message); +} -enum {JSON_integer_en_main = 1}; +static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count) +{ + size_t entries_count = count / 2; + VALUE object = rb_hash_new_capa(entries_count); + const VALUE *pairs = rvalue_stack_peek(state->stack, count); + rb_hash_bulk_insert(count, pairs, object); + + if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) { + switch (config->on_duplicate_key) { + case JSON_IGNORE: + break; + case JSON_DEPRECATED: + emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); + break; + case JSON_RAISE: + raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs)); + break; + } + } + rvalue_stack_pop(state->stack, count); -#line 307 "parser.rl" + if (config->freeze) { + RB_OBJ_FREEZE(object); + } + return object; +} -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value) { - int cs = EVIL; - - -#line 896 "parser.c" - { - cs = JSON_integer_start; - } - -#line 314 "parser.rl" - json->memo = p; - -#line 904 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr4; -tr4: -#line 304 "parser.rl" - { p--; {p++; cs = 4; goto _out;} } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 945 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - goto tr4; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 316 "parser.rl" - - if (cs >= JSON_integer_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); - return p + 1; - } else { - return NULL; + if (RB_UNLIKELY(config->on_load_proc)) { + value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil); } + rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack); + return value; } +static const bool string_scan_table[256] = { + // ASCII Control Characters + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // ASCII Characters + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +#ifdef HAVE_SIMD +static SIMD_Implementation simd_impl = SIMD_NONE; +#endif /* HAVE_SIMD */ + +ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + + uint64_t mask = 0; + if (string_scan_simd_neon(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros64(mask) >> 2; + return true; + } -#line 979 "parser.c" -enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 8}; -enum {JSON_float_error = 0}; +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + int mask = 0; + if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros(mask); + return true; + } + } +#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ +#endif /* HAVE_SIMD */ -enum {JSON_float_en_main = 1}; + while (!eos(state)) { + if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { + return true; + } + state->cursor++; + } + return false; +} +static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start) +{ + const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS]; + JSON_UnescapePositions positions = { + .size = 0, + .positions = backslashes, + .additional_backslashes = 0, + }; + + do { + switch (*state->cursor) { + case '"': { + VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions); + state->cursor++; + return json_push_value(state, config, string); + } + case '\\': { + if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) { + backslashes[positions.size] = state->cursor; + positions.size++; + } else { + positions.additional_backslashes++; + } + state->cursor++; + break; + } + default: + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } + break; + } -#line 341 "parser.rl" + state->cursor++; + } while (string_scan(state)); + raise_parse_error("unexpected end of input, expected closing \"", state); + return Qfalse; +} -static int is_bigdecimal_class(VALUE obj) +ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) { - if (cBigDecimal == Qundef) { - if (rb_const_defined(rb_cObject, i_BigDecimal)) { - cBigDecimal = rb_const_get_at(rb_cObject, i_BigDecimal); + state->cursor++; + const char *start = state->cursor; + + if (RB_UNLIKELY(!string_scan(state))) { + raise_parse_error("unexpected end of input, expected closing \"", state); } - else { - return 0; + + if (RB_LIKELY(*state->cursor == '"')) { + VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name); + state->cursor++; + return json_push_value(state, config, string); } - } - return obj == cBigDecimal; + return json_parse_escaped_string(state, config, is_name, start); +} + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +// Additional References: +// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html +static inline uint64_t decode_8digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; +} + +static inline uint64_t decode_4digits_unrolled(uint32_t val) { + const uint32_t mask = 0x000000FF; + const uint32_t mul1 = 100; + val -= 0x30303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = ((val & mask) * mul1) + (((val >> 16) & mask)); + return val; } +#endif -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) { - int cs = EVIL; - - -#line 1008 "parser.c" - { - cs = JSON_float_start; - } - -#line 361 "parser.rl" - json->memo = p; - -#line 1016 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; - } - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 69: goto st5; - case 101: goto st5; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) >= 45 ) - goto st0; - goto tr9; -tr9: -#line 335 "parser.rl" - { p--; {p++; cs = 9; goto _out;} } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 1081 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - switch( (*p) ) { - case 43: goto st6; - case 45: goto st6; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 69: goto st0; - case 101: goto st0; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - } else if ( (*p) >= 45 ) - goto st0; - goto tr9; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 363 "parser.rl" - - if (cs >= JSON_float_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - if (NIL_P(json->decimal_class)) { - *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); - } else { - VALUE text; - text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); - if (is_bigdecimal_class(json->decimal_class)) { - *result = rb_funcall(Qnil, i_BigDecimal, 1, text); - } else { - *result = rb_funcall(json->decimal_class, i_new, 1, text); - } + const char *start = state->cursor; + +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) >= sizeof(uint64_t)) { + uint64_t next_8bytes; + memcpy(&next_8bytes, state->cursor, sizeof(uint64_t)); + + // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333 + // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html + uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4); + + if (match == 0x3333333333333333) { // 8 consecutive digits + *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes); + state->cursor += 8; + continue; } - return p + 1; - } else { - return NULL; + + uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT; + + if (consecutive_digits >= 4) { + *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); + state->cursor += 4; + consecutive_digits -= 4; + } + + while (consecutive_digits) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + consecutive_digits--; + state->cursor++; + } + + return (int)(state->cursor - start); + } +#endif + + char next_char; + while (rb_isdigit(next_char = peek(state))) { + *accumulator = *accumulator * 10 + (next_char - '0'); + state->cursor++; } + return (int)(state->cursor - start); } +static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) +{ + bool integer = true; + const char first_digit = *state->cursor; + // Variables for Ryu optimization - extract digits during parsing + int32_t exponent = 0; + int decimal_point_pos = -1; + uint64_t mantissa = 0; -#line 1168 "parser.c" -enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 17}; -enum {JSON_array_error = 0}; + // Parse integer part and extract mantissa digits + int mantissa_digits = json_parse_digits(state, &mantissa); -enum {JSON_array_en_main = 1}; + if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) { + raise_parse_error_at("invalid number: %s", state, start); + } + // Parse fractional part + if (peek(state) == '.') { + integer = false; + decimal_point_pos = mantissa_digits; // Remember position of decimal point + state->cursor++; -#line 416 "parser.rl" + int fractional_digits = json_parse_digits(state, &mantissa); + mantissa_digits += fractional_digits; + if (RB_UNLIKELY(!fractional_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + } -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - VALUE array_class = json->array_class; + // Parse exponent + if (rb_tolower(peek(state)) == 'e') { + integer = false; + state->cursor++; - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); - - -#line 1190 "parser.c" - { - cs = JSON_array_start; - } - -#line 429 "parser.rl" - -#line 1197 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 91 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st13; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st2; - goto st0; -tr2: -#line 393 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v, current_nesting); - if (np == NULL) { - p--; {p++; cs = 3; goto _out;} - } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } - {p = (( np))-1;} + bool negative_exponent = false; + const char next_char = peek(state); + if (next_char == '-' || next_char == '+') { + negative_exponent = next_char == '-'; + state->cursor++; + } + + uint64_t abs_exponent = 0; + int exponent_digits = json_parse_digits(state, &abs_exponent); + + if (RB_UNLIKELY(!exponent_digits)) { + raise_parse_error_at("invalid number: %s", state, start); } + + exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent); } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1256 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 44: goto st4; - case 47: goto st9; - case 93: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 13: goto st4; - case 32: goto st4; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st5; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st4; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st8; - } - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) == 42 ) - goto st7; - goto st6; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st4; - } - goto st6; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) == 10 ) - goto st4; - goto st8; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 42: goto st10; - case 47: goto st12; - } - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 42 ) - goto st11; - goto st10; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st11; - case 47: goto st3; - } - goto st10; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 10 ) - goto st3; - goto st12; -tr4: -#line 408 "parser.rl" - { p--; {p++; cs = 17; goto _out;} } - goto st17; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: -#line 1363 "parser.c" - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st16; - } - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 42 ) - goto st15; - goto st14; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - switch( (*p) ) { - case 42: goto st15; - case 47: goto st2; - } - goto st14; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 10 ) - goto st2; - goto st16; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 430 "parser.rl" - - if(cs >= JSON_array_first_final) { - return p + 1; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return NULL; + + if (integer) { + return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor); + } + + // Adjust exponent based on decimal point position + if (decimal_point_pos >= 0) { + exponent -= (mantissa_digits - decimal_point_pos); } + + return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor); } -static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) +static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config) { - char *p = string, *pe = string, *unescape; - int unescape_len; - char buf[4]; - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) rb_str_buf_cat(result, p, pe - p); - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - rb_enc_raise( - EXC_ENCODING eParserError, - "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p - ); - } else { - UTF32 ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { - pe++; - if (pe > stringEnd - 6) { - rb_enc_raise( - EXC_ENCODING eParserError, - "%u: incomplete surrogate pair at '%s'", __LINE__, p - ); - } - if (pe[0] == '\\' && pe[1] == 'u') { - UTF32 sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } + return json_parse_number(state, config, false, state->cursor); +} + +static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + const char *start = state->cursor; + state->cursor++; + return json_parse_number(state, config, true, start); +} + +static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) +{ + json_eat_whitespace(state); + + switch (peek(state)) { + case 'n': + if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { + state->cursor += 4; + return json_push_value(state, config, Qnil); + } + + raise_parse_error("unexpected token %s", state); + break; + case 't': + if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { + state->cursor += 4; + return json_push_value(state, config, Qtrue); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'f': + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { + state->cursor += 5; + return json_push_value(state, config, Qfalse); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { + state->cursor += 3; + return json_push_value(state, config, CNaN); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'I': + if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { + state->cursor += 8; + return json_push_value(state, config, CInfinity); + } + + raise_parse_error("unexpected token %s", state); + break; + case '-': { + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { + if (config->allow_nan) { + state->cursor += 9; + return json_push_value(state, config, CMinusInfinity); + } else { + raise_parse_error("unexpected token %s", state); + } + } + return json_push_value(state, config, json_parse_negative_number(state, config)); + break; + } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + return json_push_value(state, config, json_parse_positive_number(state, config)); + break; + case '"': { + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + return json_parse_string(state, config, false); + break; + } + case '[': { + state->cursor++; + json_eat_whitespace(state); + long stack_head = state->stack->head; + + if (peek(state) == ']') { + state->cursor++; + return json_push_value(state, config, json_decode_array(state, config, 0)); + } else { + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + state->in_array++; + json_parse_any(state, config); + } + + while (true) { + json_eat_whitespace(state); + + const char next_char = peek(state); + + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == ']') { + continue; } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; } - break; - default: - p = pe; + json_parse_any(state, config); continue; + } + + if (next_char == ']') { + state->cursor++; + long count = state->stack->head - stack_head; + state->current_nesting--; + state->in_array--; + return json_push_value(state, config, json_decode_array(state, config, count)); + } + + raise_parse_error("expected ',' or ']' after array value", state); } - rb_str_buf_cat(result, unescape, unescape_len); - p = ++pe; - } else { - pe++; + break; } - } - rb_str_buf_cat(result, p, pe - p); - return result; -} + case '{': { + const char *object_start_cursor = state->cursor; + state->cursor++; + json_eat_whitespace(state); + long stack_head = state->stack->head; -#line 1508 "parser.c" -enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 8}; -enum {JSON_string_error = 0}; + if (peek(state) == '}') { + state->cursor++; + return json_push_value(state, config, json_decode_object(state, config, 0)); + } else { + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } -enum {JSON_string_en_main = 1}; + if (peek(state) != '"') { + raise_parse_error("expected object key, got %s", state); + } + json_parse_string(state, config, true); + json_eat_whitespace(state); + if (peek(state) != ':') { + raise_parse_error("expected ':' after object key", state); + } + state->cursor++; -#line 537 "parser.rl" + json_parse_any(state, config); + } + while (true) { + json_eat_whitespace(state); -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} + const char next_char = peek(state); + if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = state->stack->head - stack_head; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - *result = rb_str_buf_new(0); - -#line 1538 "parser.c" - { - cs = JSON_string_start; - } - -#line 558 "parser.rl" - json->memo = p; - -#line 1546 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 34 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 34: goto tr2; - case 92: goto st3; - } - if ( 0 <= (*p) && (*p) <= 31 ) - goto st0; - goto st2; -tr2: -#line 523 "parser.rl" - { - *result = json_string_unescape(*result, json->memo + 1, p); - if (NIL_P(*result)) { - p--; - {p++; cs = 8; goto _out;} - } else { - FORCE_UTF8(*result); - {p = (( p + 1))-1;} + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = object_start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; + + return json_push_value(state, config, object); + } + + if (next_char == ',') { + state->cursor++; + json_eat_whitespace(state); + + if (config->allow_trailing_comma) { + if (peek(state) == '}') { + continue; + } + } + + if (RB_UNLIKELY(peek(state) != '"')) { + raise_parse_error("expected object key, got: %s", state); + } + json_parse_string(state, config, true); + + json_eat_whitespace(state); + if (RB_UNLIKELY(peek(state) != ':')) { + raise_parse_error("expected ':' after object key, got: %s", state); + } + state->cursor++; + + json_parse_any(state, config); + + continue; + } + + raise_parse_error("expected ',' or '}' after object value, got: %s", state); + } + break; } - } -#line 534 "parser.rl" - { p--; {p++; cs = 8; goto _out;} } - goto st8; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 1589 "parser.c" - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 117 ) - goto st4; - if ( 0 <= (*p) && (*p) <= 31 ) - goto st0; - goto st2; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st5; - } else - goto st5; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st6; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st6; - } else - goto st6; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st7; - } else - goto st7; - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st2; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st2; - } else - goto st2; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 560 "parser.rl" - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - if (json->symbolize_names && json->parsing_name) { - *result = rb_str_intern(*result); - } else { - if (RB_TYPE_P(*result, T_STRING)) { - rb_str_resize(*result, RSTRING_LEN(*result)); - } + case 0: + raise_parse_error("unexpected end of input", state); + break; + + default: + raise_parse_error("unexpected character: %s", state); + break; } - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; + + raise_parse_error("unreachable: %s", state); + return Qundef; +} + +static void json_ensure_eof(JSON_ParserState *state) +{ + json_eat_whitespace(state); + if (!eos(state)) { + raise_parse_error("unexpected token at end of stream %s", state); } } @@ -1702,26 +1411,84 @@ case 7: static VALUE convert_encoding(VALUE source) { -#ifdef HAVE_RUBY_ENCODING_H - rb_encoding *enc = rb_enc_get(source); - if (enc == rb_ascii8bit_encoding()) { - if (OBJ_FROZEN(source)) { - source = rb_str_dup(source); - } - FORCE_UTF8(source); - } else { - source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding()); - } -#endif + int encindex = RB_ENCODING_GET(source); + + if (RB_LIKELY(encindex == utf8_encindex)) { return source; + } + + if (encindex == binary_encindex) { + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + } + + return rb_funcall(source, i_encode, 1, Encoding_UTF_8); +} + +static int parser_config_init_i(VALUE key, VALUE val, VALUE data) +{ + JSON_ParserConfig *config = (JSON_ParserConfig *)data; + + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { + if (RTEST(val)) { + if (rb_respond_to(val, i_try_convert)) { + config->decimal_class = val; + config->decimal_method_id = i_try_convert; + } else if (rb_respond_to(val, i_new)) { + config->decimal_class = val; + config->decimal_method_id = i_new; + } else if (RB_TYPE_P(val, T_CLASS)) { + VALUE name = rb_class_name(val); + const char *name_cstr = RSTRING_PTR(name); + const char *last_colon = strrchr(name_cstr, ':'); + if (last_colon) { + const char *mod_path_end = last_colon - 1; + VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); + config->decimal_class = rb_path_to_class(mod_path); + + const char *method_name_beg = last_colon + 1; + long before_len = method_name_beg - name_cstr; + long len = RSTRING_LEN(name) - before_len; + VALUE method_name = rb_str_substr(name, before_len, len); + config->decimal_method_id = SYM2ID(rb_str_intern(method_name)); + } else { + config->decimal_class = rb_mKernel; + config->decimal_method_id = SYM2ID(rb_str_intern(name)); + } + } + } + } + + return ST_CONTINUE; +} + +static void parser_config_init(JSON_ParserConfig *config, VALUE opts) +{ + config->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, parser_config_init_i, (VALUE)config); + } + + } } /* - * call-seq: new(source, opts => {}) - * - * Creates a new JSON::Ext::Parser instance for the string _source_. + * call-seq: new(opts => {}) * - * Creates a new JSON::Ext::Parser instance for the string _source_. + * Creates a new JSON::Ext::ParserConfig instance. * * It will be configured by the _opts_ hash. _opts_ can have the following * keys: @@ -1737,366 +1504,136 @@ static VALUE convert_encoding(VALUE source) * (keys) in a JSON object. Otherwise strings are returned, which is * also the default. It's not possible to use this option in * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash - * * *array_class*: Defaults to Array + * * *decimal_class*: Specifies which class to use instead of the default + * (Float) when parsing decimal numbers. This class must accept a single + * string argument in its constructor. */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +static VALUE cParserConfig_initialize(VALUE self, VALUE opts) { - VALUE source, opts; - GET_PARSER_INIT; + rb_check_frozen(self); + GET_PARSER_CONFIG; + + parser_config_init(config, opts); + + RB_OBJ_WRITTEN(self, Qundef, config->decimal_class); - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } -#ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH - rb_scan_args(argc, argv, "1:", &source, &opts); -#else - rb_scan_args(argc, argv, "11", &source, &opts); -#endif - if (!NIL_P(opts)) { -#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH - opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(opts)) { - rb_raise(rb_eArgError, "opts needs to be like a hash"); - } else { -#endif - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; - } - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } -#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH - } -#endif - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 1; - json->create_id = rb_funcall(mJSON, i_create_id, 0); - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; - json->Vsource = source; return self; } +static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource) +{ + Vsource = convert_encoding(StringValue(Vsource)); + StringValue(Vsource); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; -#line 1854 "parser.c" -enum {JSON_start = 1}; -enum {JSON_first_final = 10}; -enum {JSON_error = 0}; + long len; + const char *start; + RSTRING_GETMEM(Vsource, start, len); -enum {JSON_en_main = 1}; + JSON_ParserState _state = { + .start = start, + .cursor = start, + .end = start + len, + .stack = &stack, + }; + JSON_ParserState *state = &_state; + VALUE result = json_parse_any(state, config); -#line 762 "parser.rl" + // This may be skipped in case of exception, but + // it won't cause a leak. + rvalue_stack_eagerly_release(state->stack_handle); + json_ensure_eof(state); + + return result; +} /* - * call-seq: parse() + * call-seq: parse(source) * * Parses the current JSON text _source_ and returns the complete data * structure as a result. + * It raises JSON::ParserError if fail to parse. */ -static VALUE cParser_parse(VALUE self) +static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) { - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - -#line 1879 "parser.c" - { - cs = JSON_start; - } - -#line 778 "parser.rl" - p = json->source; - pe = p + json->len; - -#line 1888 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 754 "parser.rl" - { - char *np = JSON_parse_value(json, p, pe, &result, 0); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 1932 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 781 "parser.rl" - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } + GET_PARSER_CONFIG; + return cParser_parse(config, Vsource); +} + +static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) +{ + Vsource = convert_encoding(StringValue(Vsource)); + StringValue(Vsource); + + JSON_ParserConfig _config = {0}; + JSON_ParserConfig *config = &_config; + parser_config_init(config, opts); + + return cParser_parse(config, Vsource); } -static void JSON_mark(void *ptr) +static void JSON_ParserConfig_mark(void *ptr) { - JSON_Parser *json = ptr; - rb_gc_mark_maybe(json->Vsource); - rb_gc_mark_maybe(json->create_id); - rb_gc_mark_maybe(json->object_class); - rb_gc_mark_maybe(json->array_class); - rb_gc_mark_maybe(json->decimal_class); - rb_gc_mark_maybe(json->match_string); + JSON_ParserConfig *config = ptr; + rb_gc_mark(config->on_load_proc); + rb_gc_mark(config->decimal_class); } -static void JSON_free(void *ptr) +static void JSON_ParserConfig_free(void *ptr) { - JSON_Parser *json = ptr; - fbuffer_free(json->fbuffer); - ruby_xfree(json); + JSON_ParserConfig *config = ptr; + ruby_xfree(config); } -static size_t JSON_memsize(const void *ptr) +static size_t JSON_ParserConfig_memsize(const void *ptr) { - const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); + return sizeof(JSON_ParserConfig); } -#ifdef NEW_TYPEDDATA_WRAPPER -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, -#ifdef RUBY_TYPED_FREE_IMMEDIATELY +static const rb_data_type_t JSON_ParserConfig_type = { + "JSON::Ext::Parser/ParserConfig", + { + JSON_ParserConfig_mark, + JSON_ParserConfig_free, + JSON_ParserConfig_memsize, + }, 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -#endif + RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE, }; -#endif static VALUE cJSON_parser_s_allocate(VALUE klass) { - JSON_Parser *json; - VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - json->fbuffer = fbuffer_alloc(0); - return obj; -} - -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) -{ - GET_PARSER; - return rb_str_dup(json->Vsource); + JSON_ParserConfig *config; + return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config); } void Init_parser(void) { +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + #undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - eParserError = rb_path2class("JSON::ParserError"); + VALUE mExt = rb_define_module_under(mJSON, "Ext"); + VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject); eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eParserError); rb_gc_register_mark_object(eNestingError); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); + rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate); + rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1); + rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1); + + VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); @@ -2107,32 +1644,29 @@ void Init_parser(void) CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); rb_gc_register_mark_object(CMinusInfinity); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); - i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); - i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); - i_key_p = rb_intern("key?"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); + rb_global_variable(&Encoding_UTF_8); + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_on_load = ID2SYM(rb_intern("on_load")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + i_new = rb_intern("new"); - i_BigDecimal = rb_intern("BigDecimal"); -} + i_try_convert = rb_intern("try_convert"); + i_uminus = rb_intern("-@"); + i_encode = rb_intern("encode"); -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ + binary_encindex = rb_ascii8bit_encindex(); + utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); + +#ifdef HAVE_SIMD + simd_impl = find_simd_implementation(); +#endif +} diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h deleted file mode 100644 index e6cf779024..0000000000 --- a/ext/json/parser/parser.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#include "ruby.h" - -#ifndef HAVE_RUBY_RE_H -#include "re.h" -#endif - -#ifdef HAVE_RUBY_ST_H -#include "ruby/st.h" -#else -#include "st.h" -#endif - -#define option_given_p(opts, key) RTEST(rb_funcall(opts, i_key_p, 1, key)) - -/* unicode */ - -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ - -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - int max_nesting; - int allow_nan; - int parsing_name; - int symbolize_names; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - int create_additions; - VALUE match_string; - FBuffer *fbuffer; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static UTF32 unescape_unicode(const unsigned char *p); -static int convert_UTF32_to_UTF8(char *buf, UTF32 ch); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static VALUE convert_encoding(VALUE source); -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cParser_parse(VALUE self); -static void JSON_mark(void *json); -static void JSON_free(void *json); -static VALUE cJSON_parser_s_allocate(VALUE klass); -static VALUE cParser_source(VALUE self); -#ifndef ZALLOC -#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type))) -static inline void *ruby_zalloc(size_t n) -{ - void *p = ruby_xmalloc(n); - memset(p, 0, n); - return p; -} -#endif -#ifdef TypedData_Make_Struct -static const rb_data_type_t JSON_Parser_type; -#define NEW_TYPEDDATA_WRAPPER 1 -#else -#define TypedData_Make_Struct(klass, type, ignore, json) Data_Make_Struct(klass, type, NULL, JSON_free, json) -#define TypedData_Get_Struct(self, JSON_Parser, ignore, json) Data_Get_Struct(self, JSON_Parser, json) -#endif - -#endif diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl deleted file mode 100644 index d4e7a60e9d..0000000000 --- a/ext/json/parser/parser.rl +++ /dev/null @@ -1,891 +0,0 @@ -#include "../fbuffer/fbuffer.h" -#include "parser.h" - -#if defined HAVE_RUBY_ENCODING_H -# define EXC_ENCODING rb_utf8_encoding(), -# ifndef HAVE_RB_ENC_RAISE -static void -enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) -{ - va_list args; - VALUE mesg; - - va_start(args, fmt); - mesg = rb_enc_vsprintf(enc, fmt, args); - va_end(args); - - rb_exc_raise(rb_exc_new3(exc, mesg)); -} -# define rb_enc_raise enc_raise -# endif -#else -# define EXC_ENCODING /* nothing */ -# define rb_enc_raise rb_raise -#endif - -/* unicode */ - -static const char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static UTF32 unescape_unicode(const unsigned char *p) -{ - char b; - UTF32 result = 0; - b = digit_values[p[0]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return UNI_REPLACEMENT_CHAR; - result = (result << 4) | (unsigned char)b; - return result; -} - -static int convert_UTF32_to_UTF8(char *buf, UTF32 ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} - -static VALUE mJSON, mExt, cParser, eParserError, eNestingError; -static VALUE CNaN, CInfinity, CMinusInfinity; -static VALUE cBigDecimal = Qundef; - -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, i_key_p, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_BigDecimal; - -%%{ - machine JSON_common; - - cr = '\n'; - cr_neg = [^\n]; - ws = [ \t\r\n]; - c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; - cpp_comment = '//' cr_neg* cr; - comment = c_comment | cpp_comment; - ignore = ws | comment; - name_separator = ':'; - value_separator = ','; - Vnull = 'null'; - Vfalse = 'false'; - Vtrue = 'true'; - VNaN = 'NaN'; - VInfinity = 'Infinity'; - VMinusInfinity = '-Infinity'; - begin_value = [nft\"\-\[\{NI] | digit; - begin_object = '{'; - end_object = '}'; - begin_array = '['; - end_array = ']'; - begin_string = '"'; - begin_name = begin_string; - begin_number = digit | '-'; -}%% - -%%{ - machine JSON_object; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - if (NIL_P(json->object_class)) { - rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); - } - fexec np; - } - } - - action parse_name { - char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, fpc, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - - pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value; - next_pair = ignore* value_separator pair; - - main := ( - begin_object - (pair (next_pair)*)? ignore* - end_object - ) @exit; -}%% - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); - - %% write init; - %% write exec; - - if (cs >= JSON_object_first_final) { - if (json->create_additions) { - VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); - } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - } - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_value; - include JSON_common; - - write data; - - action parse_null { - *result = Qnil; - } - action parse_false { - *result = Qfalse; - } - action parse_true { - *result = Qtrue; - } - action parse_nan { - if (json->allow_nan) { - *result = CNaN; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2); - } - } - action parse_infinity { - if (json->allow_nan) { - *result = CInfinity; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8); - } - } - action parse_string { - char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_number { - char *np; - if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - fexec p + 10; - fhold; fbreak; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - } - } - np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) fexec np; - np = JSON_parse_integer(json, fpc, pe, result); - if (np != NULL) fexec np; - fhold; fbreak; - } - - action parse_array { - char *np; - np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_object { - char *np; - np = JSON_parse_object(json, fpc, pe, result, current_nesting + 1); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - -main := ignore* ( - Vnull @parse_null | - Vfalse @parse_false | - Vtrue @parse_true | - VNaN @parse_nan | - VInfinity @parse_infinity | - begin_number >parse_number | - begin_string >parse_string | - begin_array >parse_array | - begin_object >parse_object - ) ignore* %*exit; -}%% - -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - %% write init; - %% write exec; - - if (cs >= JSON_value_first_final) { - return p; - } else { - return NULL; - } -} - -%%{ - machine JSON_integer; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); -}%% - -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_integer_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10); - return p + 1; - } else { - return NULL; - } -} - -%%{ - machine JSON_float; - include JSON_common; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ( - (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) - | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) - ) (^[0-9Ee.\-]? @exit ); -}%% - -static int is_bigdecimal_class(VALUE obj) -{ - if (cBigDecimal == Qundef) { - if (rb_const_defined(rb_cObject, i_BigDecimal)) { - cBigDecimal = rb_const_get_at(rb_cObject, i_BigDecimal); - } - else { - return 0; - } - } - return obj == cBigDecimal; -} - -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_float_first_final) { - long len = p - json->memo; - fbuffer_clear(json->fbuffer); - fbuffer_append(json->fbuffer, json->memo, len); - fbuffer_append_char(json->fbuffer, '\0'); - if (NIL_P(json->decimal_class)) { - *result = rb_float_new(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1)); - } else { - VALUE text; - text = rb_str_new2(FBUFFER_PTR(json->fbuffer)); - if (is_bigdecimal_class(json->decimal_class)) { - *result = rb_funcall(Qnil, i_BigDecimal, 1, text); - } else { - *result = rb_funcall(json->decimal_class, i_new, 1, text); - } - } - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_array; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } - fexec np; - } - } - - action exit { fhold; fbreak; } - - next_element = value_separator ignore* begin_value >parse_value; - - main := begin_array ignore* - ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*)? - end_array @exit; -}%% - -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - VALUE array_class = json->array_class; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); - - %% write init; - %% write exec; - - if(cs >= JSON_array_first_final) { - return p + 1; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return NULL; - } -} - -static VALUE json_string_unescape(VALUE result, char *string, char *stringEnd) -{ - char *p = string, *pe = string, *unescape; - int unescape_len; - char buf[4]; - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) rb_str_buf_cat(result, p, pe - p); - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - rb_enc_raise( - EXC_ENCODING eParserError, - "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p - ); - } else { - UTF32 ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - if (UNI_SUR_HIGH_START == (ch & 0xFC00)) { - pe++; - if (pe > stringEnd - 6) { - rb_enc_raise( - EXC_ENCODING eParserError, - "%u: incomplete surrogate pair at '%s'", __LINE__, p - ); - } - if (pe[0] == '\\' && pe[1] == 'u') { - UTF32 sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } - } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; - } - break; - default: - p = pe; - continue; - } - rb_str_buf_cat(result, unescape, unescape_len); - p = ++pe; - } else { - pe++; - } - } - rb_str_buf_cat(result, p, pe - p); - return result; -} - -%%{ - machine JSON_string; - include JSON_common; - - write data; - - action parse_string { - *result = json_string_unescape(*result, json->memo + 1, p); - if (NIL_P(*result)) { - fhold; - fbreak; - } else { - FORCE_UTF8(*result); - fexec p + 1; - } - } - - action exit { fhold; fbreak; } - - main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; -}%% - -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} - -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - *result = rb_str_buf_new(0); - %% write init; - json->memo = p; - %% write exec; - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (json->symbolize_names && json->parsing_name) { - *result = rb_str_intern(*result); - } else { - if (RB_TYPE_P(*result, T_STRING)) { - rb_str_resize(*result, RSTRING_LEN(*result)); - } - } - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; - } -} - -/* - * Document-class: JSON::Ext::Parser - * - * This is the JSON parser implemented as a C extension. It can be configured - * to be used by setting - * - * JSON.parser = JSON::Ext::Parser - * - * with the method parser= in JSON. - * - */ - -static VALUE convert_encoding(VALUE source) -{ -#ifdef HAVE_RUBY_ENCODING_H - rb_encoding *enc = rb_enc_get(source); - if (enc == rb_ascii8bit_encoding()) { - if (OBJ_FROZEN(source)) { - source = rb_str_dup(source); - } - FORCE_UTF8(source); - } else { - source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding()); - } -#endif - return source; -} - -/* - * call-seq: new(source, opts => {}) - * - * Creates a new JSON::Ext::Parser instance for the string _source_. - * - * Creates a new JSON::Ext::Parser instance for the string _source_. - * - * It will be configured by the _opts_ hash. _opts_ can have the following - * keys: - * - * _opts_ can have the following keys: - * * *max_nesting*: The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, it - * defaults to 100. - * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - * defiance of RFC 4627 to be parsed by the Parser. This option defaults to - * false. - * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is - * also the default. It's not possible to use this option in - * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash - * * *array_class*: Defaults to Array - */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) -{ - VALUE source, opts; - GET_PARSER_INIT; - - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } -#ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH - rb_scan_args(argc, argv, "1:", &source, &opts); -#else - rb_scan_args(argc, argv, "11", &source, &opts); -#endif - if (!NIL_P(opts)) { -#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH - opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash"); - if (NIL_P(opts)) { - rb_raise(rb_eArgError, "opts needs to be like a hash"); - } else { -#endif - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - json->create_additions = RTEST(rb_hash_aref(opts, tmp)); - } else { - json->create_additions = 0; - } - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } -#ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH - } -#endif - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 1; - json->create_id = rb_funcall(mJSON, i_create_id, 0); - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source);; - json->Vsource = source; - return self; -} - -%%{ - machine JSON; - - write data; - - include JSON_common; - - action parse_value { - char *np = JSON_parse_value(json, fpc, pe, &result, 0); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_value >parse_value - ) ignore*; -}%% - -/* - * call-seq: parse() - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - */ -static VALUE cParser_parse(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } -} - -static void JSON_mark(void *ptr) -{ - JSON_Parser *json = ptr; - rb_gc_mark_maybe(json->Vsource); - rb_gc_mark_maybe(json->create_id); - rb_gc_mark_maybe(json->object_class); - rb_gc_mark_maybe(json->array_class); - rb_gc_mark_maybe(json->decimal_class); - rb_gc_mark_maybe(json->match_string); -} - -static void JSON_free(void *ptr) -{ - JSON_Parser *json = ptr; - fbuffer_free(json->fbuffer); - ruby_xfree(json); -} - -static size_t JSON_memsize(const void *ptr) -{ - const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(json->fbuffer); -} - -#ifdef NEW_TYPEDDATA_WRAPPER -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, -#ifdef RUBY_TYPED_FREE_IMMEDIATELY - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -#endif -}; -#endif - -static VALUE cJSON_parser_s_allocate(VALUE klass) -{ - JSON_Parser *json; - VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - json->fbuffer = fbuffer_alloc(0); - return obj; -} - -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) -{ - GET_PARSER; - return rb_str_dup(json->Vsource); -} - -void Init_parser(void) -{ -#undef rb_intern - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - eParserError = rb_path2class("JSON::ParserError"); - eNestingError = rb_path2class("JSON::NestingError"); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); - - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); - i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); - i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); - i_key_p = rb_intern("key?"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); - i_new = rb_intern("new"); - i_BigDecimal = rb_intern("BigDecimal"); -} - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk deleted file mode 100644 index be7bcb4319..0000000000 --- a/ext/json/parser/prereq.mk +++ /dev/null @@ -1,10 +0,0 @@ -RAGEL = ragel - -.SUFFIXES: .rl - -.rl.c: - $(RAGEL) -G2 $< - $(BASERUBY) -pli -e '$$_.sub!(/[ \t]+$$/, "")' \ - -e '$$_.sub!(/^static const int (JSON_.*=.*);$$/, "enum {\\1};")' $@ - -parser.c: diff --git a/ext/json/simd/conf.rb b/ext/json/simd/conf.rb new file mode 100644 index 0000000000..76f774bc97 --- /dev/null +++ b/ext/json/simd/conf.rb @@ -0,0 +1,24 @@ +case RbConfig::CONFIG['host_cpu'] +when /^(arm|aarch64)/ + # Try to compile a small program using NEON instructions + header, type, init, extra = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)', nil +when /^(x86_64|x64)/ + header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }' +end +if header + if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration') + #{cpp_include(header)} + int main(int argc, char **argv) { + #{type} test = #{init}; + #{extra} + if (argc > 100000) printf("%p", &test); + return 0; + } + SRC + $defs.push("-DJSON_ENABLE_SIMD") + else + puts "Disable SIMD" + end +end + +have_header('cpuid.h') diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h new file mode 100644 index 0000000000..3bb86acdec --- /dev/null +++ b/ext/json/simd/simd.h @@ -0,0 +1,218 @@ +#include "../json.h" + +typedef enum { + SIMD_NONE, + SIMD_NEON, + SIMD_SSE2 +} SIMD_Implementation; + +#ifndef __has_builtin // Optional of course. + #define __has_builtin(x) 0 // Compatibility with non-clang compilers. +#endif + +#ifdef __clang__ +# if __has_builtin(__builtin_ctzll) +# define HAVE_BUILTIN_CTZLL 1 +# else +# define HAVE_BUILTIN_CTZLL 0 +# endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define HAVE_BUILTIN_CTZLL 1 +#else +# define HAVE_BUILTIN_CTZLL 0 +#endif + +static inline uint32_t trailing_zeros64(uint64_t input) +{ + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + +#if HAVE_BUILTIN_CTZLL + return __builtin_ctzll(input); +#else + uint32_t trailing_zeros = 0; + uint64_t temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +static inline int trailing_zeros(int input) +{ + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + +#if HAVE_BUILTIN_CTZLL + return __builtin_ctz(input); +#else + int trailing_zeros = 0; + int temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +#ifdef JSON_ENABLE_SIMD + +#define SIMD_MINIMUM_THRESHOLD 4 + +ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len) +{ + RBIMPL_ASSERT_OR_ASSUME(len < 16); + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4 +#if defined(__has_builtin) && __has_builtin(__builtin_memcpy) + // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes. + // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy + // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct + // position in both copies. + + // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the + // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)), + // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional + // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch + // plus two loads and stores generated when using __builtin_memcpy. + if (len >= 8) { + __builtin_memcpy(dst, src, 8); + __builtin_memcpy(dst + len - 8, src + len - 8, 8); + } else { + __builtin_memcpy(dst, src, 4); + __builtin_memcpy(dst + len - 4, src + len - 4, 4); + } +#else + MEMCPY(dst, src, char, len); +#endif +} + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) +#include <arm_neon.h> + +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 +static inline SIMD_Implementation find_simd_implementation(void) +{ + return SIMD_NEON; +} + +#define HAVE_SIMD 1 +#define HAVE_SIMD_NEON 1 + +// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +ALWAYS_INLINE(static) uint64_t neon_match_mask(uint8x16_t matches) +{ + const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); + const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return mask & 0x8888888888888888ull; +} + +ALWAYS_INLINE(static) uint64_t compute_chunk_mask_neon(const char *ptr) +{ + uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); + + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33)); + + uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\')); + uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash); + return neon_match_mask(needs_escape); +} + +ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask) +{ + while (*ptr + sizeof(uint8x16_t) <= end) { + uint64_t chunk_mask = compute_chunk_mask_neon(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(uint8x16_t); + } + return 0; +} + +static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table) +{ + uint8x16x4_t tab; + tab.val[0] = vld1q_u8(table); + tab.val[1] = vld1q_u8(table+16); + tab.val[2] = vld1q_u8(table+32); + tab.val[3] = vld1q_u8(table+48); + return tab; +} + +#endif /* ARM Neon Support.*/ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + +#ifdef HAVE_X86INTRIN_H +#include <x86intrin.h> + +#define HAVE_SIMD 1 +#define HAVE_SIMD_SSE2 1 + +#ifdef HAVE_CPUID_H +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif + +#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) +#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) +#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) +#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) + +ALWAYS_INLINE(static) TARGET_SSE2 int compute_chunk_mask_sse2(const char *ptr) +{ + __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33)); + __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\')); + __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash); + return _mm_movemask_epi8(needs_escape); +} + +ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, const char *end, int *mask) +{ + while (*ptr + sizeof(__m128i) <= end) { + int chunk_mask = compute_chunk_mask_sse2(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(__m128i); + } + + return 0; +} + +#include <cpuid.h> +#endif /* HAVE_CPUID_H */ + +static inline SIMD_Implementation find_simd_implementation(void) +{ + // TODO Revisit. I think the SSE version now only uses SSE2 instructions. + if (__builtin_cpu_supports("sse2")) { + return SIMD_SSE2; + } + + return SIMD_NONE; +} + +#endif /* HAVE_X86INTRIN_H */ +#endif /* X86_64 Support */ + +#endif /* JSON_ENABLE_SIMD */ + +#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED +static inline SIMD_Implementation find_simd_implementation(void) +{ + return SIMD_NONE; +} +#endif diff --git a/ext/json/vendor/fpconv.c b/ext/json/vendor/fpconv.c new file mode 100644 index 0000000000..6c9bc2c103 --- /dev/null +++ b/ext/json/vendor/fpconv.c @@ -0,0 +1,480 @@ +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +// The contents of this file is extracted from https://github.com/night-shift/fpconv +// It was slightly modified to append ".0" to plain floats, for use with the https://github.com/ruby/json package. + +#include <stdbool.h> +#include <string.h> +#include <stdint.h> + +#if JSON_DEBUG +#include <assert.h> +#endif + +#define npowers 87 +#define steppowers 8 +#define firstpower -348 /* 10 ^ -348 */ + +#define expmax -32 +#define expmin -60 + +typedef struct Fp { + uint64_t frac; + int exp; +} Fp; + +static const Fp powers_ten[] = { + { 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 }, + { 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 }, + { 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 }, + { 12353653155963782858U, -1060 }, { 18408377700990114895U, -1034 }, + { 13715310171984221708U, -1007 }, { 10218702384817765436U, -980 }, + { 15227053142812498563U, -954 }, { 11345038669416679861U, -927 }, + { 16905424996341287883U, -901 }, { 12595523146049147757U, -874 }, + { 9384396036005875287U, -847 }, { 13983839803942852151U, -821 }, + { 10418772551374772303U, -794 }, { 15525180923007089351U, -768 }, + { 11567161174868858868U, -741 }, { 17236413322193710309U, -715 }, + { 12842128665889583758U, -688 }, { 9568131466127621947U, -661 }, + { 14257626930069360058U, -635 }, { 10622759856335341974U, -608 }, + { 15829145694278690180U, -582 }, { 11793632577567316726U, -555 }, + { 17573882009934360870U, -529 }, { 13093562431584567480U, -502 }, + { 9755464219737475723U, -475 }, { 14536774485912137811U, -449 }, + { 10830740992659433045U, -422 }, { 16139061738043178685U, -396 }, + { 12024538023802026127U, -369 }, { 17917957937422433684U, -343 }, + { 13349918974505688015U, -316 }, { 9946464728195732843U, -289 }, + { 14821387422376473014U, -263 }, { 11042794154864902060U, -236 }, + { 16455045573212060422U, -210 }, { 12259964326927110867U, -183 }, + { 18268770466636286478U, -157 }, { 13611294676837538539U, -130 }, + { 10141204801825835212U, -103 }, { 15111572745182864684U, -77 }, + { 11258999068426240000U, -50 }, { 16777216000000000000U, -24 }, + { 12500000000000000000U, 3 }, { 9313225746154785156U, 30 }, + { 13877787807814456755U, 56 }, { 10339757656912845936U, 83 }, + { 15407439555097886824U, 109 }, { 11479437019748901445U, 136 }, + { 17105694144590052135U, 162 }, { 12744735289059618216U, 189 }, + { 9495567745759798747U, 216 }, { 14149498560666738074U, 242 }, + { 10542197943230523224U, 269 }, { 15709099088952724970U, 295 }, + { 11704190886730495818U, 322 }, { 17440603504673385349U, 348 }, + { 12994262207056124023U, 375 }, { 9681479787123295682U, 402 }, + { 14426529090290212157U, 428 }, { 10748601772107342003U, 455 }, + { 16016664761464807395U, 481 }, { 11933345169920330789U, 508 }, + { 17782069995880619868U, 534 }, { 13248674568444952270U, 561 }, + { 9871031767461413346U, 588 }, { 14708983551653345445U, 614 }, + { 10959046745042015199U, 641 }, { 16330252207878254650U, 667 }, + { 12166986024289022870U, 694 }, { 18130221999122236476U, 720 }, + { 13508068024458167312U, 747 }, { 10064294952495520794U, 774 }, + { 14996968138956309548U, 800 }, { 11173611982879273257U, 827 }, + { 16649979327439178909U, 853 }, { 12405201291620119593U, 880 }, + { 9242595204427927429U, 907 }, { 13772540099066387757U, 933 }, + { 10261342003245940623U, 960 }, { 15290591125556738113U, 986 }, + { 11392378155556871081U, 1013 }, { 16975966327722178521U, 1039 }, + { 12648080533535911531U, 1066 } +}; + +static Fp find_cachedpow10(int exp, int* k) +{ + const double one_log_ten = 0.30102999566398114; + + int approx = (int)(-(exp + npowers) * one_log_ten); + int idx = (approx - firstpower) / steppowers; + + while(1) { + int current = exp + powers_ten[idx].exp + 64; + + if(current < expmin) { + idx++; + continue; + } + + if(current > expmax) { + idx--; + continue; + } + + *k = (firstpower + idx * steppowers); + + return powers_ten[idx]; + } +} + +#define fracmask 0x000FFFFFFFFFFFFFU +#define expmask 0x7FF0000000000000U +#define hiddenbit 0x0010000000000000U +#define signmask 0x8000000000000000U +#define expbias (1023 + 52) + +#define absv(n) ((n) < 0 ? -(n) : (n)) +#define minv(a, b) ((a) < (b) ? (a) : (b)) + +static const uint64_t tens[] = { + 10000000000000000000U, 1000000000000000000U, 100000000000000000U, + 10000000000000000U, 1000000000000000U, 100000000000000U, + 10000000000000U, 1000000000000U, 100000000000U, + 10000000000U, 1000000000U, 100000000U, + 10000000U, 1000000U, 100000U, + 10000U, 1000U, 100U, + 10U, 1U +}; + +static inline uint64_t get_dbits(double d) +{ + union { + double dbl; + uint64_t i; + } dbl_bits = { d }; + + return dbl_bits.i; +} + +static Fp build_fp(double d) +{ + uint64_t bits = get_dbits(d); + + Fp fp; + fp.frac = bits & fracmask; + fp.exp = (bits & expmask) >> 52; + + if(fp.exp) { + fp.frac += hiddenbit; + fp.exp -= expbias; + + } else { + fp.exp = -expbias + 1; + } + + return fp; +} + +static void normalize(Fp* fp) +{ + while ((fp->frac & hiddenbit) == 0) { + fp->frac <<= 1; + fp->exp--; + } + + int shift = 64 - 52 - 1; + fp->frac <<= shift; + fp->exp -= shift; +} + +static void get_normalized_boundaries(Fp* fp, Fp* lower, Fp* upper) +{ + upper->frac = (fp->frac << 1) + 1; + upper->exp = fp->exp - 1; + + while ((upper->frac & (hiddenbit << 1)) == 0) { + upper->frac <<= 1; + upper->exp--; + } + + int u_shift = 64 - 52 - 2; + + upper->frac <<= u_shift; + upper->exp = upper->exp - u_shift; + + + int l_shift = fp->frac == hiddenbit ? 2 : 1; + + lower->frac = (fp->frac << l_shift) - 1; + lower->exp = fp->exp - l_shift; + + + lower->frac <<= lower->exp - upper->exp; + lower->exp = upper->exp; +} + +static Fp multiply(Fp* a, Fp* b) +{ + const uint64_t lomask = 0x00000000FFFFFFFF; + + uint64_t ah_bl = (a->frac >> 32) * (b->frac & lomask); + uint64_t al_bh = (a->frac & lomask) * (b->frac >> 32); + uint64_t al_bl = (a->frac & lomask) * (b->frac & lomask); + uint64_t ah_bh = (a->frac >> 32) * (b->frac >> 32); + + uint64_t tmp = (ah_bl & lomask) + (al_bh & lomask) + (al_bl >> 32); + /* round up */ + tmp += 1U << 31; + + Fp fp = { + ah_bh + (ah_bl >> 32) + (al_bh >> 32) + (tmp >> 32), + a->exp + b->exp + 64 + }; + + return fp; +} + +static void round_digit(char* digits, int ndigits, uint64_t delta, uint64_t rem, uint64_t kappa, uint64_t frac) +{ + while (rem < frac && delta - rem >= kappa && + (rem + kappa < frac || frac - rem > rem + kappa - frac)) { + + digits[ndigits - 1]--; + rem += kappa; + } +} + +static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K) +{ + uint64_t wfrac = upper->frac - fp->frac; + uint64_t delta = upper->frac - lower->frac; + + Fp one; + one.frac = 1ULL << -upper->exp; + one.exp = upper->exp; + + uint64_t part1 = upper->frac >> -one.exp; + uint64_t part2 = upper->frac & (one.frac - 1); + + int idx = 0, kappa = 10; + const uint64_t* divp; + /* 1000000000 */ + for(divp = tens + 10; kappa > 0; divp++) { + + uint64_t div = *divp; + unsigned digit = (unsigned) (part1 / div); + + if (digit || idx) { + digits[idx++] = digit + '0'; + } + + part1 -= digit * div; + kappa--; + + uint64_t tmp = (part1 <<-one.exp) + part2; + if (tmp <= delta) { + *K += kappa; + round_digit(digits, idx, delta, tmp, div << -one.exp, wfrac); + + return idx; + } + } + + /* 10 */ + const uint64_t* unit = tens + 18; + + while(true) { + part2 *= 10; + delta *= 10; + kappa--; + + unsigned digit = (unsigned) (part2 >> -one.exp); + if (digit || idx) { + digits[idx++] = digit + '0'; + } + + part2 &= one.frac - 1; + if (part2 < delta) { + *K += kappa; + round_digit(digits, idx, delta, part2, one.frac, wfrac * *unit); + + return idx; + } + + unit--; + } +} + +static int grisu2(double d, char* digits, int* K) +{ + Fp w = build_fp(d); + + Fp lower, upper; + get_normalized_boundaries(&w, &lower, &upper); + + normalize(&w); + + int k; + Fp cp = find_cachedpow10(upper.exp, &k); + + w = multiply(&w, &cp); + upper = multiply(&upper, &cp); + lower = multiply(&lower, &cp); + + lower.frac++; + upper.frac--; + + *K = -k; + + return generate_digits(&w, &upper, &lower, digits, K); +} + +static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg) +{ + int exp = absv(K + ndigits - 1); + + if(K >= 0 && exp < 15) { + memcpy(dest, digits, ndigits); + memset(dest + ndigits, '0', K); + + /* add a .0 to mark this as a float. */ + dest[ndigits + K] = '.'; + dest[ndigits + K + 1] = '0'; + + return ndigits + K + 2; + } + + /* write decimal w/o scientific notation */ + if(K < 0 && (K > -7 || exp < 10)) { + int offset = ndigits - absv(K); + /* fp < 1.0 -> write leading zero */ + if(offset <= 0) { + offset = -offset; + dest[0] = '0'; + dest[1] = '.'; + memset(dest + 2, '0', offset); + memcpy(dest + offset + 2, digits, ndigits); + + return ndigits + 2 + offset; + + /* fp > 1.0 */ + } else { + memcpy(dest, digits, offset); + dest[offset] = '.'; + memcpy(dest + offset + 1, digits + offset, ndigits - offset); + + return ndigits + 1; + } + } + + /* write decimal w/ scientific notation */ + ndigits = minv(ndigits, 18 - neg); + + int idx = 0; + dest[idx++] = digits[0]; + + if(ndigits > 1) { + dest[idx++] = '.'; + memcpy(dest + idx, digits + 1, ndigits - 1); + idx += ndigits - 1; + } + + dest[idx++] = 'e'; + + char sign = K + ndigits - 1 < 0 ? '-' : '+'; + dest[idx++] = sign; + + int cent = 0; + + if(exp > 99) { + cent = exp / 100; + dest[idx++] = cent + '0'; + exp -= cent * 100; + } + if(exp > 9) { + int dec = exp / 10; + dest[idx++] = dec + '0'; + exp -= dec * 10; + + } else if(cent) { + dest[idx++] = '0'; + } + + dest[idx++] = exp % 10 + '0'; + + return idx; +} + +static int filter_special(double fp, char* dest) +{ + if(fp == 0.0) { + dest[0] = '0'; + dest[1] = '.'; + dest[2] = '0'; + return 3; + } + + uint64_t bits = get_dbits(fp); + + bool nan = (bits & expmask) == expmask; + + if(!nan) { + return 0; + } + + if(bits & fracmask) { + dest[0] = 'n'; dest[1] = 'a'; dest[2] = 'n'; + + } else { + dest[0] = 'i'; dest[1] = 'n'; dest[2] = 'f'; + } + + return 3; +} + +/* Fast and accurate double to string conversion based on Florian Loitsch's + * Grisu-algorithm[1]. + * + * Input: + * fp -> the double to convert, dest -> destination buffer. + * The generated string will never be longer than 32 characters. + * Make sure to pass a pointer to at least 32 bytes of memory. + * The emitted string will not be null terminated. + * + * + * + * Output: + * The number of written characters. + * + * Exemplary usage: + * + * void print(double d) + * { + * char buf[28 + 1] // plus null terminator + * int str_len = fpconv_dtoa(d, buf); + * + * buf[str_len] = '\0'; + * printf("%s", buf); + * } + * + */ +static int fpconv_dtoa(double d, char dest[32]) +{ + char digits[18]; + + int str_len = 0; + bool neg = false; + + if(get_dbits(d) & signmask) { + dest[0] = '-'; + str_len++; + neg = true; + } + + int spec = filter_special(d, dest + str_len); + + if(spec) { + return str_len + spec; + } + + int K = 0; + int ndigits = grisu2(d, digits, &K); + + str_len += emit_digits(digits, ndigits, dest + str_len, K, neg); +#if JSON_DEBUG + assert(str_len <= 32); +#endif + + return str_len; +} diff --git a/ext/json/vendor/jeaiii-ltoa.h b/ext/json/vendor/jeaiii-ltoa.h new file mode 100644 index 0000000000..ba4f497fc8 --- /dev/null +++ b/ext/json/vendor/jeaiii-ltoa.h @@ -0,0 +1,267 @@ +/* + +This file is released under the terms of the MIT License. It is based on the +work of James Edward Anhalt III, with the original license listed below. + +MIT License + +Copyright (c) 2024,2025 Enrico Thierbach - https://github.com/radiospiel +Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef JEAIII_TO_TEXT_H_ +#define JEAIII_TO_TEXT_H_ + +#include <stdint.h> + +typedef uint_fast32_t u32_t; +typedef uint_fast64_t u64_t; + +#define u32(x) ((u32_t)(x)) +#define u64(x) ((u64_t)(x)) + +struct digit_pair +{ + char dd[2]; +}; + +static const struct digit_pair *digits_dd = (struct digit_pair *)( + "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" +); + +static const struct digit_pair *digits_fd = (struct digit_pair *)( + "0_" "1_" "2_" "3_" "4_" "5_" "6_" "7_" "8_" "9_" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" +); + +static const u64_t mask24 = (u64(1) << 24) - 1; +static const u64_t mask32 = (u64(1) << 32) - 1; +static const u64_t mask57 = (u64(1) << 57) - 1; + +#define COPY(buffer, digits) memcpy(buffer, &(digits), sizeof(struct digit_pair)) + +static char * +jeaiii_ultoa(char *b, u64_t n) +{ + if (n < u32(1e2)) { + COPY(b, digits_fd[n]); + return n < 10 ? b + 1 : b + 2; + } + + if (n < u32(1e6)) { + if (n < u32(1e4)) { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * n); + COPY(b, digits_fd[f0 >> 24]); + + b -= n < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + + return b + 4; + } + + u64_t f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; + COPY(b, digits_fd[f0 >> 32]); + + b -= n < u32(1e5); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + return b + 6; + } + + if (n < u64(1ull << 32ull)) { + if (n < u32(1e8)) { + u64_t f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + COPY(b, digits_fd[f0 >> 32]); + + b -= n < u32(1e7); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + return b + 8; + } + + u64_t f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; + COPY(b, digits_fd[f0 >> 57]); + + b -= n < u32(1e9); + u64_t f2 = (f0 & mask57) * 100; + COPY(b + 2, digits_dd[f2 >> 57]); + + u64_t f4 = (f2 & mask57) * 100; + COPY(b + 4, digits_dd[f4 >> 57]); + + u64_t f6 = (f4 & mask57) * 100; + COPY(b + 6, digits_dd[f6 >> 57]); + + u64_t f8 = (f6 & mask57) * 100; + COPY(b + 8, digits_dd[f8 >> 57]); + + return b + 10; + } + + // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings + u32_t z = n % u32(1e8); + u64_t u = n / u32(1e8); + + if (u < u32(1e2)) { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + COPY(b, digits_dd[u]); + b += 2; + } + else if (u < u32(1e6)) { + if (u < u32(1e4)) { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * u); + COPY(b, digits_fd[f0 >> 24]); + + b -= u < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + b += 4; + } + else { + u64_t f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; + COPY(b, digits_fd[f0 >> 32]); + + b -= u < u32(1e5); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + b += 6; + } + } + else if (u < u32(1e8)) { + u64_t f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + COPY(b, digits_fd[f0 >> 32]); + + b -= u < u32(1e7); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + b += 8; + } + else if (u < u64(1ull << 32ull)) { + u64_t f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; + COPY(b, digits_fd[f0 >> 57]); + + b -= u < u32(1e9); + u64_t f2 = (f0 & mask57) * 100; + COPY(b + 2, digits_dd[f2 >> 57]); + + u64_t f4 = (f2 & mask57) * 100; + COPY(b + 4, digits_dd[f4 >> 57]); + + u64_t f6 = (f4 & mask57) * 100; + COPY(b + 6, digits_dd[f6 >> 57]); + + u64_t f8 = (f6 & mask57) * 100; + COPY(b + 8, digits_dd[f8 >> 57]); + b += 10; + } + else { + u32_t y = u % u32(1e8); + u /= u32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < u32(1e2)) { + COPY(b, digits_dd[u]); + b += 2; + } + else { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * u); + COPY(b, digits_fd[f0 >> 24]); + + b -= u < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + + b += 4; + } + // do 8 digits + u64_t f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + COPY(b, digits_dd[f0 >> 32]); + + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + b += 8; + } + + // do 8 digits + u64_t f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + COPY(b, digits_dd[f0 >> 32]); + + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + return b + 8; +} + +#undef u32 +#undef u64 +#undef COPY + +#endif // JEAIII_TO_TEXT_H_ diff --git a/ext/json/vendor/ryu.h b/ext/json/vendor/ryu.h new file mode 100644 index 0000000000..f06ec814b4 --- /dev/null +++ b/ext/json/vendor/ryu.h @@ -0,0 +1,819 @@ +// Copyright 2018 Ulf Adams +// +// The contents of this file may be used under the terms of the Apache License, +// Version 2.0. +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. +// +// --- +// +// Apache License +// Version 2.0, January 2004 +// http://www.apache.org/licenses/ +// +// TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +// +// 1. Definitions. +// +// "License" shall mean the terms and conditions for use, reproduction, +// and distribution as defined by Sections 1 through 9 of this document. +// +// "Licensor" shall mean the copyright owner or entity authorized by +// the copyright owner that is granting the License. +// +// "Legal Entity" shall mean the union of the acting entity and all +// other entities that control, are controlled by, or are under common +// control with that entity. For the purposes of this definition, +// "control" means (i) the power, direct or indirect, to cause the +// direction or management of such entity, whether by contract or +// otherwise, or (ii) ownership of fifty percent (50%) or more of the +// outstanding shares, or (iii) beneficial ownership of such entity. +// +// "You" (or "Your") shall mean an individual or Legal Entity +// exercising permissions granted by this License. +// +// "Source" form shall mean the preferred form for making modifications, +// including but not limited to software source code, documentation +// source, and configuration files. +// +// "Object" form shall mean any form resulting from mechanical +// transformation or translation of a Source form, including but +// not limited to compiled object code, generated documentation, +// and conversions to other media types. +// +// "Work" shall mean the work of authorship, whether in Source or +// Object form, made available under the License, as indicated by a +// copyright notice that is included in or attached to the work +// (an example is provided in the Appendix below). +// +// "Derivative Works" shall mean any work, whether in Source or Object +// form, that is based on (or derived from) the Work and for which the +// editorial revisions, annotations, elaborations, or other modifications +// represent, as a whole, an original work of authorship. For the purposes +// of this License, Derivative Works shall not include works that remain +// separable from, or merely link (or bind by name) to the interfaces of, +// the Work and Derivative Works thereof. +// +// "Contribution" shall mean any work of authorship, including +// the original version of the Work and any modifications or additions +// to that Work or Derivative Works thereof, that is intentionally +// submitted to Licensor for inclusion in the Work by the copyright owner +// or by an individual or Legal Entity authorized to submit on behalf of +// the copyright owner. For the purposes of this definition, "submitted" +// means any form of electronic, verbal, or written communication sent +// to the Licensor or its representatives, including but not limited to +// communication on electronic mailing lists, source code control systems, +// and issue tracking systems that are managed by, or on behalf of, the +// Licensor for the purpose of discussing and improving the Work, but +// excluding communication that is conspicuously marked or otherwise +// designated in writing by the copyright owner as "Not a Contribution." +// +// "Contributor" shall mean Licensor and any individual or Legal Entity +// on behalf of whom a Contribution has been received by Licensor and +// subsequently incorporated within the Work. +// +// 2. Grant of Copyright License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// copyright license to reproduce, prepare Derivative Works of, +// publicly display, publicly perform, sublicense, and distribute the +// Work and such Derivative Works in Source or Object form. +// +// 3. Grant of Patent License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// (except as stated in this section) patent license to make, have made, +// use, offer to sell, sell, import, and otherwise transfer the Work, +// where such license applies only to those patent claims licensable +// by such Contributor that are necessarily infringed by their +// Contribution(s) alone or by combination of their Contribution(s) +// with the Work to which such Contribution(s) was submitted. If You +// institute patent litigation against any entity (including a +// cross-claim or counterclaim in a lawsuit) alleging that the Work +// or a Contribution incorporated within the Work constitutes direct +// or contributory patent infringement, then any patent licenses +// granted to You under this License for that Work shall terminate +// as of the date such litigation is filed. +// +// 4. Redistribution. You may reproduce and distribute copies of the +// Work or Derivative Works thereof in any medium, with or without +// modifications, and in Source or Object form, provided that You +// meet the following conditions: +// +// (a) You must give any other recipients of the Work or +// Derivative Works a copy of this License; and +// +// (b) You must cause any modified files to carry prominent notices +// stating that You changed the files; and +// +// (c) You must retain, in the Source form of any Derivative Works +// that You distribute, all copyright, patent, trademark, and +// attribution notices from the Source form of the Work, +// excluding those notices that do not pertain to any part of +// the Derivative Works; and +// +// (d) If the Work includes a "NOTICE" text file as part of its +// distribution, then any Derivative Works that You distribute must +// include a readable copy of the attribution notices contained +// within such NOTICE file, excluding those notices that do not +// pertain to any part of the Derivative Works, in at least one +// of the following places: within a NOTICE text file distributed +// as part of the Derivative Works; within the Source form or +// documentation, if provided along with the Derivative Works; or, +// within a display generated by the Derivative Works, if and +// wherever such third-party notices normally appear. The contents +// of the NOTICE file are for informational purposes only and +// do not modify the License. You may add Your own attribution +// notices within Derivative Works that You distribute, alongside +// or as an addendum to the NOTICE text from the Work, provided +// that such additional attribution notices cannot be construed +// as modifying the License. +// +// You may add Your own copyright statement to Your modifications and +// may provide additional or different license terms and conditions +// for use, reproduction, or distribution of Your modifications, or +// for any such Derivative Works as a whole, provided Your use, +// reproduction, and distribution of the Work otherwise complies with +// the conditions stated in this License. +// +// 5. Submission of Contributions. Unless You explicitly state otherwise, +// any Contribution intentionally submitted for inclusion in the Work +// by You to the Licensor shall be under the terms and conditions of +// this License, without any additional terms or conditions. +// Notwithstanding the above, nothing herein shall supersede or modify +// the terms of any separate license agreement you may have executed +// with Licensor regarding such Contributions. +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor, +// except as required for reasonable and customary use in describing the +// origin of the Work and reproducing the content of the NOTICE file. +// +// 7. Disclaimer of Warranty. Unless required by applicable law or +// agreed to in writing, Licensor provides the Work (and each +// Contributor provides its Contributions) on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied, including, without limitation, any warranties or conditions +// of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +// PARTICULAR PURPOSE. You are solely responsible for determining the +// appropriateness of using or redistributing the Work and assume any +// risks associated with Your exercise of permissions under this License. +// +// 8. Limitation of Liability. In no event and under no legal theory, +// whether in tort (including negligence), contract, or otherwise, +// unless required by applicable law (such as deliberate and grossly +// negligent acts) or agreed to in writing, shall any Contributor be +// liable to You for damages, including any direct, indirect, special, +// incidental, or consequential damages of any character arising as a +// result of this License or out of the use or inability to use the +// Work (including but not limited to damages for loss of goodwill, +// work stoppage, computer failure or malfunction, or any and all +// other commercial damages or losses), even if such Contributor +// has been advised of the possibility of such damages. +// +// 9. Accepting Warranty or Additional Liability. While redistributing +// the Work or Derivative Works thereof, You may choose to offer, +// and charge a fee for, acceptance of support, warranty, indemnity, +// or other liability obligations and/or rights consistent with this +// License. However, in accepting such obligations, You may act only +// on Your own behalf and on Your sole responsibility, not on behalf +// of any other Contributor, and only if You agree to indemnify, +// defend, and hold each Contributor harmless for any liability +// incurred by, or claims asserted against, such Contributor by reason +// of your accepting any such warranty or additional liability. +// +// END OF TERMS AND CONDITIONS +// +// APPENDIX: How to apply the Apache License to your work. +// +// To apply the Apache License to your work, attach the following +// boilerplate notice, with the fields enclosed by brackets "[]" +// replaced with your own identifying information. (Don't include +// the brackets!) The text should be enclosed in the appropriate +// comment syntax for the file format. We also recommend that a +// file or class name and description of purpose be included on the +// same "printed page" as the copyright notice for easier +// identification within third-party archives. +// +// Copyright [yyyy] [name of copyright owner] +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// --- +// +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// --- +// Minimal Ryu implementation adapted for Ruby JSON gem by Josef Šimánek +// Optimized for pre-extracted mantissa/exponent from JSON parsing +// This is a stripped-down version containing only what's needed for +// converting decimal mantissa+exponent to IEEE 754 double precision. + +#ifndef RYU_H +#define RYU_H + +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +// Detect __builtin_clzll availability (for floor_log2) +// Note: MSVC doesn't have __builtin_clzll, so we provide a fallback +#ifdef __clang__ + #if __has_builtin(__builtin_clzll) + #define RYU_HAVE_BUILTIN_CLZLL 1 + #else + #define RYU_HAVE_BUILTIN_CLZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define RYU_HAVE_BUILTIN_CLZLL 1 +#else + #define RYU_HAVE_BUILTIN_CLZLL 0 +#endif + +// Count leading zeros (for floor_log2) +static inline uint32_t ryu_leading_zeros64(uint64_t input) +{ +#if RYU_HAVE_BUILTIN_CLZLL + return __builtin_clzll(input); +#else + // Fallback: binary search for the highest set bit + // This works on MSVC and other compilers without __builtin_clzll + if (input == 0) return 64; + uint32_t n = 0; + if (input <= 0x00000000FFFFFFFFULL) { n += 32; input <<= 32; } + if (input <= 0x0000FFFFFFFFFFFFULL) { n += 16; input <<= 16; } + if (input <= 0x00FFFFFFFFFFFFFFULL) { n += 8; input <<= 8; } + if (input <= 0x0FFFFFFFFFFFFFFFULL) { n += 4; input <<= 4; } + if (input <= 0x3FFFFFFFFFFFFFFFULL) { n += 2; input <<= 2; } + if (input <= 0x7FFFFFFFFFFFFFFFULL) { n += 1; } + return n; +#endif +} + +// These tables are generated by PrintDoubleLookupTable. +#define DOUBLE_POW5_INV_BITCOUNT 125 +#define DOUBLE_POW5_BITCOUNT 125 + +#define DOUBLE_POW5_INV_TABLE_SIZE 342 +#define DOUBLE_POW5_TABLE_SIZE 326 + +static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = { + { 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u }, + { 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u }, + { 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u }, + { 13026647942995916322u, 1208925819614629174u }, { 9774590264567735146u, 1934281311383406679u }, + { 11509021026396098440u, 1547425049106725343u }, { 16585914450600699399u, 1237940039285380274u }, + { 15469416676735388068u, 1980704062856608439u }, { 16064882156130220778u, 1584563250285286751u }, + { 9162556910162266299u, 1267650600228229401u }, { 7281393426775805432u, 2028240960365167042u }, + { 16893161185646375315u, 1622592768292133633u }, { 2446482504291369283u, 1298074214633706907u }, + { 7603720821608101175u, 2076918743413931051u }, { 2393627842544570617u, 1661534994731144841u }, + { 16672297533003297786u, 1329227995784915872u }, { 11918280793837635165u, 2126764793255865396u }, + { 5845275820328197809u, 1701411834604692317u }, { 15744267100488289217u, 1361129467683753853u }, + { 3054734472329800808u, 2177807148294006166u }, { 17201182836831481939u, 1742245718635204932u }, + { 6382248639981364905u, 1393796574908163946u }, { 2832900194486363201u, 2230074519853062314u }, + { 5955668970331000884u, 1784059615882449851u }, { 1075186361522890384u, 1427247692705959881u }, + { 12788344622662355584u, 2283596308329535809u }, { 13920024512871794791u, 1826877046663628647u }, + { 3757321980813615186u, 1461501637330902918u }, { 10384555214134712795u, 1169201309864722334u }, + { 5547241898389809503u, 1870722095783555735u }, { 4437793518711847602u, 1496577676626844588u }, + { 10928932444453298728u, 1197262141301475670u }, { 17486291911125277965u, 1915619426082361072u }, + { 6610335899416401726u, 1532495540865888858u }, { 12666966349016942027u, 1225996432692711086u }, + { 12888448528943286597u, 1961594292308337738u }, { 17689456452638449924u, 1569275433846670190u }, + { 14151565162110759939u, 1255420347077336152u }, { 7885109000409574610u, 2008672555323737844u }, + { 9997436015069570011u, 1606938044258990275u }, { 7997948812055656009u, 1285550435407192220u }, + { 12796718099289049614u, 2056880696651507552u }, { 2858676849947419045u, 1645504557321206042u }, + { 13354987924183666206u, 1316403645856964833u }, { 17678631863951955605u, 2106245833371143733u }, + { 3074859046935833515u, 1684996666696914987u }, { 13527933681774397782u, 1347997333357531989u }, + { 10576647446613305481u, 2156795733372051183u }, { 15840015586774465031u, 1725436586697640946u }, + { 8982663654677661702u, 1380349269358112757u }, { 18061610662226169046u, 2208558830972980411u }, + { 10759939715039024913u, 1766847064778384329u }, { 12297300586773130254u, 1413477651822707463u }, + { 15986332124095098083u, 2261564242916331941u }, { 9099716884534168143u, 1809251394333065553u }, + { 14658471137111155161u, 1447401115466452442u }, { 4348079280205103483u, 1157920892373161954u }, + { 14335624477811986218u, 1852673427797059126u }, { 7779150767507678651u, 1482138742237647301u }, + { 2533971799264232598u, 1185710993790117841u }, { 15122401323048503126u, 1897137590064188545u }, + { 12097921058438802501u, 1517710072051350836u }, { 5988988032009131678u, 1214168057641080669u }, + { 16961078480698431330u, 1942668892225729070u }, { 13568862784558745064u, 1554135113780583256u }, + { 7165741412905085728u, 1243308091024466605u }, { 11465186260648137165u, 1989292945639146568u }, + { 16550846638002330379u, 1591434356511317254u }, { 16930026125143774626u, 1273147485209053803u }, + { 4951948911778577463u, 2037035976334486086u }, { 272210314680951647u, 1629628781067588869u }, + { 3907117066486671641u, 1303703024854071095u }, { 6251387306378674625u, 2085924839766513752u }, + { 16069156289328670670u, 1668739871813211001u }, { 9165976216721026213u, 1334991897450568801u }, + { 7286864317269821294u, 2135987035920910082u }, { 16897537898041588005u, 1708789628736728065u }, + { 13518030318433270404u, 1367031702989382452u }, { 6871453250525591353u, 2187250724783011924u }, + { 9186511415162383406u, 1749800579826409539u }, { 11038557946871817048u, 1399840463861127631u }, + { 10282995085511086630u, 2239744742177804210u }, { 8226396068408869304u, 1791795793742243368u }, + { 13959814484210916090u, 1433436634993794694u }, { 11267656730511734774u, 2293498615990071511u }, + { 5324776569667477496u, 1834798892792057209u }, { 7949170070475892320u, 1467839114233645767u }, + { 17427382500606444826u, 1174271291386916613u }, { 5747719112518849781u, 1878834066219066582u }, + { 15666221734240810795u, 1503067252975253265u }, { 12532977387392648636u, 1202453802380202612u }, + { 5295368560860596524u, 1923926083808324180u }, { 4236294848688477220u, 1539140867046659344u }, + { 7078384693692692099u, 1231312693637327475u }, { 11325415509908307358u, 1970100309819723960u }, + { 9060332407926645887u, 1576080247855779168u }, { 14626963555825137356u, 1260864198284623334u }, + { 12335095245094488799u, 2017382717255397335u }, { 9868076196075591040u, 1613906173804317868u }, + { 15273158586344293478u, 1291124939043454294u }, { 13369007293925138595u, 2065799902469526871u }, + { 7005857020398200553u, 1652639921975621497u }, { 16672732060544291412u, 1322111937580497197u }, + { 11918976037903224966u, 2115379100128795516u }, { 5845832015580669650u, 1692303280103036413u }, + { 12055363241948356366u, 1353842624082429130u }, { 841837113407818570u, 2166148198531886609u }, + { 4362818505468165179u, 1732918558825509287u }, { 14558301248600263113u, 1386334847060407429u }, + { 12225235553534690011u, 2218135755296651887u }, { 2401490813343931363u, 1774508604237321510u }, + { 1921192650675145090u, 1419606883389857208u }, { 17831303500047873437u, 2271371013423771532u }, + { 6886345170554478103u, 1817096810739017226u }, { 1819727321701672159u, 1453677448591213781u }, + { 16213177116328979020u, 1162941958872971024u }, { 14873036941900635463u, 1860707134196753639u }, + { 15587778368262418694u, 1488565707357402911u }, { 8780873879868024632u, 1190852565885922329u }, + { 2981351763563108441u, 1905364105417475727u }, { 13453127855076217722u, 1524291284333980581u }, + { 7073153469319063855u, 1219433027467184465u }, { 11317045550910502167u, 1951092843947495144u }, + { 12742985255470312057u, 1560874275157996115u }, { 10194388204376249646u, 1248699420126396892u }, + { 1553625868034358140u, 1997919072202235028u }, { 8621598323911307159u, 1598335257761788022u }, + { 17965325103354776697u, 1278668206209430417u }, { 13987124906400001422u, 2045869129935088668u }, + { 121653480894270168u, 1636695303948070935u }, { 97322784715416134u, 1309356243158456748u }, + { 14913111714512307107u, 2094969989053530796u }, { 8241140556867935363u, 1675975991242824637u }, + { 17660958889720079260u, 1340780792994259709u }, { 17189487779326395846u, 2145249268790815535u }, + { 13751590223461116677u, 1716199415032652428u }, { 18379969808252713988u, 1372959532026121942u }, + { 14650556434236701088u, 2196735251241795108u }, { 652398703163629901u, 1757388200993436087u }, + { 11589965406756634890u, 1405910560794748869u }, { 7475898206584884855u, 2249456897271598191u }, + { 2291369750525997561u, 1799565517817278553u }, { 9211793429904618695u, 1439652414253822842u }, + { 18428218302589300235u, 2303443862806116547u }, { 7363877012587619542u, 1842755090244893238u }, + { 13269799239553916280u, 1474204072195914590u }, { 10615839391643133024u, 1179363257756731672u }, + { 2227947767661371545u, 1886981212410770676u }, { 16539753473096738529u, 1509584969928616540u }, + { 13231802778477390823u, 1207667975942893232u }, { 6413489186596184024u, 1932268761508629172u }, + { 16198837793502678189u, 1545815009206903337u }, { 5580372605318321905u, 1236652007365522670u }, + { 8928596168509315048u, 1978643211784836272u }, { 18210923379033183008u, 1582914569427869017u }, + { 7190041073742725760u, 1266331655542295214u }, { 436019273762630246u, 2026130648867672343u }, + { 7727513048493924843u, 1620904519094137874u }, { 9871359253537050198u, 1296723615275310299u }, + { 4726128361433549347u, 2074757784440496479u }, { 7470251503888749801u, 1659806227552397183u }, + { 13354898832594820487u, 1327844982041917746u }, { 13989140502667892133u, 2124551971267068394u }, + { 14880661216876224029u, 1699641577013654715u }, { 11904528973500979224u, 1359713261610923772u }, + { 4289851098633925465u, 2175541218577478036u }, { 18189276137874781665u, 1740432974861982428u }, + { 3483374466074094362u, 1392346379889585943u }, { 1884050330976640656u, 2227754207823337509u }, + { 5196589079523222848u, 1782203366258670007u }, { 15225317707844309248u, 1425762693006936005u }, + { 5913764258841343181u, 2281220308811097609u }, { 8420360221814984868u, 1824976247048878087u }, + { 17804334621677718864u, 1459980997639102469u }, { 17932816512084085415u, 1167984798111281975u }, + { 10245762345624985047u, 1868775676978051161u }, { 4507261061758077715u, 1495020541582440929u }, + { 7295157664148372495u, 1196016433265952743u }, { 7982903447895485668u, 1913626293225524389u }, + { 10075671573058298858u, 1530901034580419511u }, { 4371188443704728763u, 1224720827664335609u }, + { 14372599139411386667u, 1959553324262936974u }, { 15187428126271019657u, 1567642659410349579u }, + { 15839291315758726049u, 1254114127528279663u }, { 3206773216762499739u, 2006582604045247462u }, + { 13633465017635730761u, 1605266083236197969u }, { 14596120828850494932u, 1284212866588958375u }, + { 4907049252451240275u, 2054740586542333401u }, { 236290587219081897u, 1643792469233866721u }, + { 14946427728742906810u, 1315033975387093376u }, { 16535586736504830250u, 2104054360619349402u }, + { 5849771759720043554u, 1683243488495479522u }, { 15747863852001765813u, 1346594790796383617u }, + { 10439186904235184007u, 2154551665274213788u }, { 15730047152871967852u, 1723641332219371030u }, + { 12584037722297574282u, 1378913065775496824u }, { 9066413911450387881u, 2206260905240794919u }, + { 10942479943902220628u, 1765008724192635935u }, { 8753983955121776503u, 1412006979354108748u }, + { 10317025513452932081u, 2259211166966573997u }, { 874922781278525018u, 1807368933573259198u }, + { 8078635854506640661u, 1445895146858607358u }, { 13841606313089133175u, 1156716117486885886u }, + { 14767872471458792434u, 1850745787979017418u }, { 746251532941302978u, 1480596630383213935u }, + { 597001226353042382u, 1184477304306571148u }, { 15712597221132509104u, 1895163686890513836u }, + { 8880728962164096960u, 1516130949512411069u }, { 10793931984473187891u, 1212904759609928855u }, + { 17270291175157100626u, 1940647615375886168u }, { 2748186495899949531u, 1552518092300708935u }, + { 2198549196719959625u, 1242014473840567148u }, { 18275073973719576693u, 1987223158144907436u }, + { 10930710364233751031u, 1589778526515925949u }, { 12433917106128911148u, 1271822821212740759u }, + { 8826220925580526867u, 2034916513940385215u }, { 7060976740464421494u, 1627933211152308172u }, + { 16716827836597268165u, 1302346568921846537u }, { 11989529279587987770u, 2083754510274954460u }, + { 9591623423670390216u, 1667003608219963568u }, { 15051996368420132820u, 1333602886575970854u }, + { 13015147745246481542u, 2133764618521553367u }, { 3033420566713364587u, 1707011694817242694u }, + { 6116085268112601993u, 1365609355853794155u }, { 9785736428980163188u, 2184974969366070648u }, + { 15207286772667951197u, 1747979975492856518u }, { 1097782973908629988u, 1398383980394285215u }, + { 1756452758253807981u, 2237414368630856344u }, { 5094511021344956708u, 1789931494904685075u }, + { 4075608817075965366u, 1431945195923748060u }, { 6520974107321544586u, 2291112313477996896u }, + { 1527430471115325346u, 1832889850782397517u }, { 12289990821117991246u, 1466311880625918013u }, + { 17210690286378213644u, 1173049504500734410u }, { 9090360384495590213u, 1876879207201175057u }, + { 18340334751822203140u, 1501503365760940045u }, { 14672267801457762512u, 1201202692608752036u }, + { 16096930852848599373u, 1921924308174003258u }, { 1809498238053148529u, 1537539446539202607u }, + { 12515645034668249793u, 1230031557231362085u }, { 1578287981759648052u, 1968050491570179337u }, + { 12330676829633449412u, 1574440393256143469u }, { 13553890278448669853u, 1259552314604914775u }, + { 3239480371808320148u, 2015283703367863641u }, { 17348979556414297411u, 1612226962694290912u }, + { 6500486015647617283u, 1289781570155432730u }, { 10400777625036187652u, 2063650512248692368u }, + { 15699319729512770768u, 1650920409798953894u }, { 16248804598352126938u, 1320736327839163115u }, + { 7551343283653851484u, 2113178124542660985u }, { 6041074626923081187u, 1690542499634128788u }, + { 12211557331022285596u, 1352433999707303030u }, { 1091747655926105338u, 2163894399531684849u }, + { 4562746939482794594u, 1731115519625347879u }, { 7339546366328145998u, 1384892415700278303u }, + { 8053925371383123274u, 2215827865120445285u }, { 6443140297106498619u, 1772662292096356228u }, + { 12533209867169019542u, 1418129833677084982u }, { 5295740528502789974u, 2269007733883335972u }, + { 15304638867027962949u, 1815206187106668777u }, { 4865013464138549713u, 1452164949685335022u }, + { 14960057215536570740u, 1161731959748268017u }, { 9178696285890871890u, 1858771135597228828u }, + { 14721654658196518159u, 1487016908477783062u }, { 4398626097073393881u, 1189613526782226450u }, + { 7037801755317430209u, 1903381642851562320u }, { 5630241404253944167u, 1522705314281249856u }, + { 814844308661245011u, 1218164251424999885u }, { 1303750893857992017u, 1949062802279999816u }, + { 15800395974054034906u, 1559250241823999852u }, { 5261619149759407279u, 1247400193459199882u }, + { 12107939454356961969u, 1995840309534719811u }, { 5997002748743659252u, 1596672247627775849u }, + { 8486951013736837725u, 1277337798102220679u }, { 2511075177753209390u, 2043740476963553087u }, + { 13076906586428298482u, 1634992381570842469u }, { 14150874083884549109u, 1307993905256673975u }, + { 4194654460505726958u, 2092790248410678361u }, { 18113118827372222859u, 1674232198728542688u }, + { 3422448617672047318u, 1339385758982834151u }, { 16543964232501006678u, 2143017214372534641u }, + { 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u }, + { 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u }, + { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u }, + { 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u }, + { 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u }, + { 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u }, + { 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u }, + { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u }, + { 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u }, + { 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u }, + { 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u }, + { 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u }, + { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u }, + { 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u }, + { 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u }, + { 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u }, + { 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u }, + { 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u }, + { 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u }, + { 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u }, + { 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u }, + { 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u }, + { 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u }, + { 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u }, + { 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u }, + { 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u }, + { 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u }, + { 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u }, + { 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u } +}; + +static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = { + { 0u, 1152921504606846976u }, { 0u, 1441151880758558720u }, + { 0u, 1801439850948198400u }, { 0u, 2251799813685248000u }, + { 0u, 1407374883553280000u }, { 0u, 1759218604441600000u }, + { 0u, 2199023255552000000u }, { 0u, 1374389534720000000u }, + { 0u, 1717986918400000000u }, { 0u, 2147483648000000000u }, + { 0u, 1342177280000000000u }, { 0u, 1677721600000000000u }, + { 0u, 2097152000000000000u }, { 0u, 1310720000000000000u }, + { 0u, 1638400000000000000u }, { 0u, 2048000000000000000u }, + { 0u, 1280000000000000000u }, { 0u, 1600000000000000000u }, + { 0u, 2000000000000000000u }, { 0u, 1250000000000000000u }, + { 0u, 1562500000000000000u }, { 0u, 1953125000000000000u }, + { 0u, 1220703125000000000u }, { 0u, 1525878906250000000u }, + { 0u, 1907348632812500000u }, { 0u, 1192092895507812500u }, + { 0u, 1490116119384765625u }, { 4611686018427387904u, 1862645149230957031u }, + { 9799832789158199296u, 1164153218269348144u }, { 12249790986447749120u, 1455191522836685180u }, + { 15312238733059686400u, 1818989403545856475u }, { 14528612397897220096u, 2273736754432320594u }, + { 13692068767113150464u, 1421085471520200371u }, { 12503399940464050176u, 1776356839400250464u }, + { 15629249925580062720u, 2220446049250313080u }, { 9768281203487539200u, 1387778780781445675u }, + { 7598665485932036096u, 1734723475976807094u }, { 274959820560269312u, 2168404344971008868u }, + { 9395221924704944128u, 1355252715606880542u }, { 2520655369026404352u, 1694065894508600678u }, + { 12374191248137781248u, 2117582368135750847u }, { 14651398557727195136u, 1323488980084844279u }, + { 13702562178731606016u, 1654361225106055349u }, { 3293144668132343808u, 2067951531382569187u }, + { 18199116482078572544u, 1292469707114105741u }, { 8913837547316051968u, 1615587133892632177u }, + { 15753982952572452864u, 2019483917365790221u }, { 12152082354571476992u, 1262177448353618888u }, + { 15190102943214346240u, 1577721810442023610u }, { 9764256642163156992u, 1972152263052529513u }, + { 17631875447420442880u, 1232595164407830945u }, { 8204786253993389888u, 1540743955509788682u }, + { 1032610780636961552u, 1925929944387235853u }, { 2951224747111794922u, 1203706215242022408u }, + { 3689030933889743652u, 1504632769052528010u }, { 13834660704216955373u, 1880790961315660012u }, + { 17870034976990372916u, 1175494350822287507u }, { 17725857702810578241u, 1469367938527859384u }, + { 3710578054803671186u, 1836709923159824231u }, { 26536550077201078u, 2295887403949780289u }, + { 11545800389866720434u, 1434929627468612680u }, { 14432250487333400542u, 1793662034335765850u }, + { 8816941072311974870u, 2242077542919707313u }, { 17039803216263454053u, 1401298464324817070u }, + { 12076381983474541759u, 1751623080406021338u }, { 5872105442488401391u, 2189528850507526673u }, + { 15199280947623720629u, 1368455531567204170u }, { 9775729147674874978u, 1710569414459005213u }, + { 16831347453020981627u, 2138211768073756516u }, { 1296220121283337709u, 1336382355046097823u }, + { 15455333206886335848u, 1670477943807622278u }, { 10095794471753144002u, 2088097429759527848u }, + { 6309871544845715001u, 1305060893599704905u }, { 12499025449484531656u, 1631326116999631131u }, + { 11012095793428276666u, 2039157646249538914u }, { 11494245889320060820u, 1274473528905961821u }, + { 532749306367912313u, 1593091911132452277u }, { 5277622651387278295u, 1991364888915565346u }, + { 7910200175544436838u, 1244603055572228341u }, { 14499436237857933952u, 1555753819465285426u }, + { 8900923260467641632u, 1944692274331606783u }, { 12480606065433357876u, 1215432671457254239u }, + { 10989071563364309441u, 1519290839321567799u }, { 9124653435777998898u, 1899113549151959749u }, + { 8008751406574943263u, 1186945968219974843u }, { 5399253239791291175u, 1483682460274968554u }, + { 15972438586593889776u, 1854603075343710692u }, { 759402079766405302u, 1159126922089819183u }, + { 14784310654990170340u, 1448908652612273978u }, { 9257016281882937117u, 1811135815765342473u }, + { 16182956370781059300u, 2263919769706678091u }, { 7808504722524468110u, 1414949856066673807u }, + { 5148944884728197234u, 1768687320083342259u }, { 1824495087482858639u, 2210859150104177824u }, + { 1140309429676786649u, 1381786968815111140u }, { 1425386787095983311u, 1727233711018888925u }, + { 6393419502297367043u, 2159042138773611156u }, { 13219259225790630210u, 1349401336733506972u }, + { 16524074032238287762u, 1686751670916883715u }, { 16043406521870471799u, 2108439588646104644u }, + { 803757039314269066u, 1317774742903815403u }, { 14839754354425000045u, 1647218428629769253u }, + { 4714634887749086344u, 2059023035787211567u }, { 9864175832484260821u, 1286889397367007229u }, + { 16941905809032713930u, 1608611746708759036u }, { 2730638187581340797u, 2010764683385948796u }, + { 10930020904093113806u, 1256727927116217997u }, { 18274212148543780162u, 1570909908895272496u }, + { 4396021111970173586u, 1963637386119090621u }, { 5053356204195052443u, 1227273366324431638u }, + { 15540067292098591362u, 1534091707905539547u }, { 14813398096695851299u, 1917614634881924434u }, + { 13870059828862294966u, 1198509146801202771u }, { 12725888767650480803u, 1498136433501503464u }, + { 15907360959563101004u, 1872670541876879330u }, { 14553786618154326031u, 1170419088673049581u }, + { 4357175217410743827u, 1463023860841311977u }, { 10058155040190817688u, 1828779826051639971u }, + { 7961007781811134206u, 2285974782564549964u }, { 14199001900486734687u, 1428734239102843727u }, + { 13137066357181030455u, 1785917798878554659u }, { 11809646928048900164u, 2232397248598193324u }, + { 16604401366885338411u, 1395248280373870827u }, { 16143815690179285109u, 1744060350467338534u }, + { 10956397575869330579u, 2180075438084173168u }, { 6847748484918331612u, 1362547148802608230u }, + { 17783057643002690323u, 1703183936003260287u }, { 17617136035325974999u, 2128979920004075359u }, + { 17928239049719816230u, 1330612450002547099u }, { 17798612793722382384u, 1663265562503183874u }, + { 13024893955298202172u, 2079081953128979843u }, { 5834715712847682405u, 1299426220705612402u }, + { 16516766677914378815u, 1624282775882015502u }, { 11422586310538197711u, 2030353469852519378u }, + { 11750802462513761473u, 1268970918657824611u }, { 10076817059714813937u, 1586213648322280764u }, + { 12596021324643517422u, 1982767060402850955u }, { 5566670318688504437u, 1239229412751781847u }, + { 2346651879933242642u, 1549036765939727309u }, { 7545000868343941206u, 1936295957424659136u }, + { 4715625542714963254u, 1210184973390411960u }, { 5894531928393704067u, 1512731216738014950u }, + { 16591536947346905892u, 1890914020922518687u }, { 17287239619732898039u, 1181821263076574179u }, + { 16997363506238734644u, 1477276578845717724u }, { 2799960309088866689u, 1846595723557147156u }, + { 10973347230035317489u, 1154122327223216972u }, { 13716684037544146861u, 1442652909029021215u }, + { 12534169028502795672u, 1803316136286276519u }, { 11056025267201106687u, 2254145170357845649u }, + { 18439230838069161439u, 1408840731473653530u }, { 13825666510731675991u, 1761050914342066913u }, + { 3447025083132431277u, 2201313642927583642u }, { 6766076695385157452u, 1375821026829739776u }, + { 8457595869231446815u, 1719776283537174720u }, { 10571994836539308519u, 2149720354421468400u }, + { 6607496772837067824u, 1343575221513417750u }, { 17482743002901110588u, 1679469026891772187u }, + { 17241742735199000331u, 2099336283614715234u }, { 15387775227926763111u, 1312085177259197021u }, + { 5399660979626290177u, 1640106471573996277u }, { 11361262242960250625u, 2050133089467495346u }, + { 11712474920277544544u, 1281333180917184591u }, { 10028907631919542777u, 1601666476146480739u }, + { 7924448521472040567u, 2002083095183100924u }, { 14176152362774801162u, 1251301934489438077u }, + { 3885132398186337741u, 1564127418111797597u }, { 9468101516160310080u, 1955159272639746996u }, + { 15140935484454969608u, 1221974545399841872u }, { 479425281859160394u, 1527468181749802341u }, + { 5210967620751338397u, 1909335227187252926u }, { 17091912818251750210u, 1193334516992033078u }, + { 12141518985959911954u, 1491668146240041348u }, { 15176898732449889943u, 1864585182800051685u }, + { 11791404716994875166u, 1165365739250032303u }, { 10127569877816206054u, 1456707174062540379u }, + { 8047776328842869663u, 1820883967578175474u }, { 836348374198811271u, 2276104959472719343u }, + { 7440246761515338900u, 1422565599670449589u }, { 13911994470321561530u, 1778206999588061986u }, + { 8166621051047176104u, 2222758749485077483u }, { 2798295147690791113u, 1389224218428173427u }, + { 17332926989895652603u, 1736530273035216783u }, { 17054472718942177850u, 2170662841294020979u }, + { 8353202440125167204u, 1356664275808763112u }, { 10441503050156459005u, 1695830344760953890u }, + { 3828506775840797949u, 2119787930951192363u }, { 86973725686804766u, 1324867456844495227u }, + { 13943775212390669669u, 1656084321055619033u }, { 3594660960206173375u, 2070105401319523792u }, + { 2246663100128858359u, 1293815875824702370u }, { 12031700912015848757u, 1617269844780877962u }, + { 5816254103165035138u, 2021587305976097453u }, { 5941001823691840913u, 1263492066235060908u }, + { 7426252279614801142u, 1579365082793826135u }, { 4671129331091113523u, 1974206353492282669u }, + { 5225298841145639904u, 1233878970932676668u }, { 6531623551432049880u, 1542348713665845835u }, + { 3552843420862674446u, 1927935892082307294u }, { 16055585193321335241u, 1204959932551442058u }, + { 10846109454796893243u, 1506199915689302573u }, { 18169322836923504458u, 1882749894611628216u }, + { 11355826773077190286u, 1176718684132267635u }, { 9583097447919099954u, 1470898355165334544u }, + { 11978871809898874942u, 1838622943956668180u }, { 14973589762373593678u, 2298278679945835225u }, + { 2440964573842414192u, 1436424174966147016u }, { 3051205717303017741u, 1795530218707683770u }, + { 13037379183483547984u, 2244412773384604712u }, { 8148361989677217490u, 1402757983365377945u }, + { 14797138505523909766u, 1753447479206722431u }, { 13884737113477499304u, 2191809349008403039u }, + { 15595489723564518921u, 1369880843130251899u }, { 14882676136028260747u, 1712351053912814874u }, + { 9379973133180550126u, 2140438817391018593u }, { 17391698254306313589u, 1337774260869386620u }, + { 3292878744173340370u, 1672217826086733276u }, { 4116098430216675462u, 2090272282608416595u }, + { 266718509671728212u, 1306420176630260372u }, { 333398137089660265u, 1633025220787825465u }, + { 5028433689789463235u, 2041281525984781831u }, { 10060300083759496378u, 1275800953740488644u }, + { 12575375104699370472u, 1594751192175610805u }, { 1884160825592049379u, 1993438990219513507u }, + { 17318501580490888525u, 1245899368887195941u }, { 7813068920331446945u, 1557374211108994927u }, + { 5154650131986920777u, 1946717763886243659u }, { 915813323278131534u, 1216698602428902287u }, + { 14979824709379828129u, 1520873253036127858u }, { 9501408849870009354u, 1901091566295159823u }, + { 12855909558809837702u, 1188182228934474889u }, { 2234828893230133415u, 1485227786168093612u }, + { 2793536116537666769u, 1856534732710117015u }, { 8663489100477123587u, 1160334207943823134u }, + { 1605989338741628675u, 1450417759929778918u }, { 11230858710281811652u, 1813022199912223647u }, + { 9426887369424876662u, 2266277749890279559u }, { 12809333633531629769u, 1416423593681424724u }, + { 16011667041914537212u, 1770529492101780905u }, { 6179525747111007803u, 2213161865127226132u }, + { 13085575628799155685u, 1383226165704516332u }, { 16356969535998944606u, 1729032707130645415u }, + { 15834525901571292854u, 2161290883913306769u }, { 2979049660840976177u, 1350806802445816731u }, + { 17558870131333383934u, 1688508503057270913u }, { 8113529608884566205u, 2110635628821588642u }, + { 9682642023980241782u, 1319147268013492901u }, { 16714988548402690132u, 1648934085016866126u }, + { 11670363648648586857u, 2061167606271082658u }, { 11905663298832754689u, 1288229753919426661u }, + { 1047021068258779650u, 1610287192399283327u }, { 15143834390605638274u, 2012858990499104158u }, + { 4853210475701136017u, 1258036869061940099u }, { 1454827076199032118u, 1572546086327425124u }, + { 1818533845248790147u, 1965682607909281405u }, { 3442426662494187794u, 1228551629943300878u }, + { 13526405364972510550u, 1535689537429126097u }, { 3072948650933474476u, 1919611921786407622u }, + { 15755650962115585259u, 1199757451116504763u }, { 15082877684217093670u, 1499696813895630954u }, + { 9630225068416591280u, 1874621017369538693u }, { 8324733676974063502u, 1171638135855961683u }, + { 5794231077790191473u, 1464547669819952104u }, { 7242788847237739342u, 1830684587274940130u }, + { 18276858095901949986u, 2288355734093675162u }, { 16034722328366106645u, 1430222333808546976u }, + { 1596658836748081690u, 1787777917260683721u }, { 6607509564362490017u, 2234722396575854651u }, + { 1823850468512862308u, 1396701497859909157u }, { 6891499104068465790u, 1745876872324886446u }, + { 17837745916940358045u, 2182346090406108057u }, { 4231062170446641922u, 1363966306503817536u }, + { 5288827713058302403u, 1704957883129771920u }, { 6611034641322878003u, 2131197353912214900u }, + { 13355268687681574560u, 1331998346195134312u }, { 16694085859601968200u, 1664997932743917890u }, + { 11644235287647684442u, 2081247415929897363u }, { 4971804045566108824u, 1300779634956185852u }, + { 6214755056957636030u, 1625974543695232315u }, { 3156757802769657134u, 2032468179619040394u }, + { 6584659645158423613u, 1270292612261900246u }, { 17454196593302805324u, 1587865765327375307u }, + { 17206059723201118751u, 1984832206659219134u }, { 6142101308573311315u, 1240520129162011959u }, + { 3065940617289251240u, 1550650161452514949u }, { 8444111790038951954u, 1938312701815643686u }, + { 665883850346957067u, 1211445438634777304u }, { 832354812933696334u, 1514306798293471630u }, + { 10263815553021896226u, 1892883497866839537u }, { 17944099766707154901u, 1183052186166774710u }, + { 13206752671529167818u, 1478815232708468388u }, { 16508440839411459773u, 1848519040885585485u }, + { 12623618533845856310u, 1155324400553490928u }, { 15779523167307320387u, 1444155500691863660u }, + { 1277659885424598868u, 1805194375864829576u }, { 1597074856780748586u, 2256492969831036970u }, + { 5609857803915355770u, 1410308106144398106u }, { 16235694291748970521u, 1762885132680497632u }, + { 1847873790976661535u, 2203606415850622041u }, { 12684136165428883219u, 1377254009906638775u }, + { 11243484188358716120u, 1721567512383298469u }, { 219297180166231438u, 2151959390479123087u }, + { 7054589765244976505u, 1344974619049451929u }, { 13429923224983608535u, 1681218273811814911u }, + { 12175718012802122765u, 2101522842264768639u }, { 14527352785642408584u, 1313451776415480399u }, + { 13547504963625622826u, 1641814720519350499u }, { 12322695186104640628u, 2052268400649188124u }, + { 16925056528170176201u, 1282667750405742577u }, { 7321262604930556539u, 1603334688007178222u }, + { 18374950293017971482u, 2004168360008972777u }, { 4566814905495150320u, 1252605225005607986u }, + { 14931890668723713708u, 1565756531257009982u }, { 9441491299049866327u, 1957195664071262478u }, + { 1289246043478778550u, 1223247290044539049u }, { 6223243572775861092u, 1529059112555673811u }, + { 3167368447542438461u, 1911323890694592264u }, { 1979605279714024038u, 1194577431684120165u }, + { 7086192618069917952u, 1493221789605150206u }, { 18081112809442173248u, 1866527237006437757u }, + { 13606538515115052232u, 1166579523129023598u }, { 7784801107039039482u, 1458224403911279498u }, + { 507629346944023544u, 1822780504889099373u }, { 5246222702107417334u, 2278475631111374216u }, + { 3278889188817135834u, 1424047269444608885u }, { 8710297504448807696u, 1780059086805761106u } +}; + +// IEEE 754 double precision constants +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_EXPONENT_BIAS 1023 + +// Helper: floor(log2(value)) using ryu_leading_zeros64 +static inline uint32_t floor_log2(const uint64_t value) { + return 63 - ryu_leading_zeros64(value); +} + +// Helper: log2(5^e) approximation +static inline int32_t log2pow5(const int32_t e) { + return (int32_t) ((((uint32_t) e) * 1217359) >> 19); +} + +// Helper: ceil(log2(5^e)) +static inline int32_t ceil_log2pow5(const int32_t e) { + return log2pow5(e) + 1; +} + +// Helper: max of two int32 +static inline int32_t max32(int32_t a, int32_t b) { + return a < b ? b : a; +} + +// Helper: convert uint64 bits to double +static inline double int64Bits2Double(uint64_t bits) { + double f; + memcpy(&f, &bits, sizeof(double)); + return f; +} + +// Check if value is multiple of 2^p +static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) { + return (value & ((1ull << p) - 1)) == 0; +} + +// Count how many times value is divisible by 5 +// Uses modular inverse to avoid expensive division +static inline uint32_t pow5Factor(uint64_t value) { + const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64) + const uint64_t n_div_5 = 3689348814741910323u; // 2^64 / 5 + uint32_t count = 0; + for (;;) { + value *= m_inv_5; + if (value > n_div_5) + break; + ++count; + } + return count; +} + +// Check if value is multiple of 5^p +// Optimized: uses modular inverse instead of division +static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) { + return pow5Factor(value) >= p; +} + +// 128-bit multiplication with shift +// This is the core operation for converting decimal to binary +#if defined(__SIZEOF_INT128__) +// Use native 128-bit integers if available (GCC/Clang) +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + const unsigned __int128 b0 = ((unsigned __int128) m) * mul[0]; + const unsigned __int128 b2 = ((unsigned __int128) m) * mul[1]; + return (uint64_t) (((b0 >> 64) + b2) >> (j - 64)); +} +#else +// Fallback for systems without 128-bit integers +static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) { + const uint32_t aLo = (uint32_t)a; + const uint32_t aHi = (uint32_t)(a >> 32); + const uint32_t bLo = (uint32_t)b; + const uint32_t bHi = (uint32_t)(b >> 32); + + const uint64_t b00 = (uint64_t)aLo * bLo; + const uint64_t b01 = (uint64_t)aLo * bHi; + const uint64_t b10 = (uint64_t)aHi * bLo; + const uint64_t b11 = (uint64_t)aHi * bHi; + + const uint32_t b00Lo = (uint32_t)b00; + const uint32_t b00Hi = (uint32_t)(b00 >> 32); + + const uint64_t mid1 = b10 + b00Hi; + const uint32_t mid1Lo = (uint32_t)(mid1); + const uint32_t mid1Hi = (uint32_t)(mid1 >> 32); + + const uint64_t mid2 = b01 + mid1Lo; + const uint32_t mid2Lo = (uint32_t)(mid2); + const uint32_t mid2Hi = (uint32_t)(mid2 >> 32); + + const uint64_t pHi = b11 + mid1Hi + mid2Hi; + const uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo; + + *productHi = pHi; + return pLo; +} + +static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) { + return (hi << (64 - dist)) | (lo >> dist); +} + +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + uint64_t high1; + const uint64_t low1 = umul128(m, mul[1], &high1); + uint64_t high0; + umul128(m, mul[0], &high0); + const uint64_t sum = high0 + low1; + if (sum < high0) { + ++high1; + } + return shiftright128(sum, high1, j - 64); +} +#endif + +// Main conversion function: decimal mantissa+exponent to IEEE 754 double +// Optimized for JSON parsing with fast paths for edge cases +static inline double ryu_s2d_from_parts(uint64_t m10, int m10digits, int32_t e10, bool signedM) { + // Fast path: handle zero explicitly (e.g., "0.0", "0e0") + if (m10 == 0) { + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + // Fast path: handle overflow/underflow early + if (m10digits + e10 <= -324) { + // Underflow to zero + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + if (m10digits + e10 >= 310) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Convert decimal to binary: m10 * 10^e10 = m2 * 2^e2 + int32_t e2; + uint64_t m2; + bool trailingZeros; + + if (e10 >= 0) { + // Positive exponent: multiply by 5^e10 and adjust binary exponent + e2 = floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_SPLIT[e10], j); + trailingZeros = e2 < e10 || (e2 - e10 < 64 && multipleOfPowerOf2(m10, e2 - e10)); + } else { + // Negative exponent: divide by 5^(-e10) + e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_INV_SPLIT[-e10], j); + trailingZeros = multipleOfPowerOf5(m10, -e10); + } + + // Compute IEEE 754 exponent + uint32_t ieee_e2 = (uint32_t) max32(0, e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2)); + + if (ieee_e2 > 0x7fe) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Compute shift amount for rounding + int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS; + + // IEEE 754 round-to-even (banker's rounding) + trailingZeros &= (m2 & ((1ull << (shift - 1)) - 1)) == 0; + uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1; + bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0)); + + uint64_t ieee_m2 = (m2 >> shift) + roundUp; + ieee_m2 &= (1ull << DOUBLE_MANTISSA_BITS) - 1; + + if (ieee_m2 == 0 && roundUp) { + ieee_e2++; + } + + // Pack sign, exponent, and mantissa into IEEE 754 format + // Match original Ryu: group sign+exponent, then shift and add mantissa + uint64_t ieee = (((((uint64_t) signedM) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2; + return int64Bits2Double(ieee); +} + +#endif // RYU_H |
