diff options
Diffstat (limited to 'ext/json')
| -rw-r--r-- | ext/json/fbuffer/fbuffer.h | 204 | ||||
| -rw-r--r-- | ext/json/generator/depend | 5 | ||||
| -rw-r--r-- | ext/json/generator/extconf.rb | 9 | ||||
| -rw-r--r-- | ext/json/generator/generator.c | 1800 | ||||
| -rw-r--r-- | ext/json/json.gemspec | 10 | ||||
| -rw-r--r-- | ext/json/json.h | 134 | ||||
| -rw-r--r-- | ext/json/lib/json.rb | 92 | ||||
| -rw-r--r-- | ext/json/lib/json/add/core.rb | 1 | ||||
| -rw-r--r-- | ext/json/lib/json/add/string.rb | 35 | ||||
| -rw-r--r-- | ext/json/lib/json/add/symbol.rb | 9 | ||||
| -rw-r--r-- | ext/json/lib/json/common.rb | 654 | ||||
| -rw-r--r-- | ext/json/lib/json/ext.rb | 4 | ||||
| -rw-r--r-- | ext/json/lib/json/ext/generator/state.rb | 26 | ||||
| -rw-r--r-- | ext/json/lib/json/generic_object.rb | 8 | ||||
| -rw-r--r-- | ext/json/lib/json/version.rb | 2 | ||||
| -rw-r--r-- | ext/json/parser/depend | 5 | ||||
| -rw-r--r-- | ext/json/parser/extconf.rb | 15 | ||||
| -rw-r--r-- | ext/json/parser/parser.c | 4088 | ||||
| -rw-r--r-- | ext/json/parser/parser.rl | 1434 | ||||
| -rw-r--r-- | ext/json/parser/prereq.mk | 13 | ||||
| -rw-r--r-- | ext/json/simd/conf.rb | 24 | ||||
| -rw-r--r-- | ext/json/simd/simd.h | 208 | ||||
| -rw-r--r-- | ext/json/vendor/fpconv.c | 480 | ||||
| -rw-r--r-- | ext/json/vendor/jeaiii-ltoa.h | 267 | ||||
| -rw-r--r-- | ext/json/vendor/ryu.h | 819 |
25 files changed, 5314 insertions, 5032 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index 0774c7e464..b4f5266ca5 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -1,39 +1,8 @@ #ifndef _FBUFFER_H_ #define _FBUFFER_H_ -#include "ruby.h" -#include "ruby/encoding.h" - -/* shims */ -/* This is the fallback definition from Ruby 3.4 */ - -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include <cstdbool> -# endif -#elif defined(HAVE_STDBOOL_H) -# include <stdbool.h> -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -#ifndef RB_LIKELY -#define RB_LIKELY(expr) expr -#endif - -#ifndef MAYBE_UNUSED -# define MAYBE_UNUSED(x) x -#endif +#include "../json.h" +#include "../vendor/jeaiii-ltoa.h" enum fbuffer_type { FBUFFER_HEAP_ALLOCATED = 0, @@ -42,9 +11,12 @@ enum fbuffer_type { typedef struct FBufferStruct { enum fbuffer_type type; - unsigned long initial_length; - unsigned long len; - unsigned long capa; + size_t initial_length; + size_t len; + size_t capa; +#if JSON_DEBUG + size_t requested; +#endif char *ptr; VALUE io; } FBuffer; @@ -59,19 +31,13 @@ typedef struct FBufferStruct { #define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) static void fbuffer_free(FBuffer *fb); -#ifndef JSON_GENERATOR static void fbuffer_clear(FBuffer *fb); -#endif -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); -#ifdef JSON_GENERATOR +static void fbuffer_append(FBuffer *fb, const char *newstr, size_t len); static void fbuffer_append_long(FBuffer *fb, long number); -#endif static inline void fbuffer_append_char(FBuffer *fb, char newchr); -#ifdef JSON_GENERATOR static VALUE fbuffer_finalize(FBuffer *fb); -#endif -static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) +static void fbuffer_stack_init(FBuffer *fb, size_t initial_length, char *stack_buffer, size_t stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; if (stack_buffer) { @@ -79,12 +45,26 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char * fb->ptr = stack_buffer; fb->capa = stack_buffer_size; } +#if JSON_DEBUG + fb->requested = 0; +#endif +} + +static inline void fbuffer_consumed(FBuffer *fb, size_t consumed) +{ +#if JSON_DEBUG + if (consumed > fb->requested) { + rb_bug("fbuffer: Out of bound write"); + } + fb->requested = 0; +#endif + fb->len += consumed; } static void fbuffer_free(FBuffer *fb) { if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { - ruby_xfree(fb->ptr); + JSON_SIZED_FREE_N(fb->ptr, fb->capa); } } @@ -99,7 +79,7 @@ static void fbuffer_flush(FBuffer *fb) fbuffer_clear(fb); } -static void fbuffer_realloc(FBuffer *fb, unsigned long required) +static void fbuffer_realloc(FBuffer *fb, size_t required) { if (required > fb->capa) { if (fb->type == FBUFFER_STACK_ALLOCATED) { @@ -108,13 +88,13 @@ static void fbuffer_realloc(FBuffer *fb, unsigned long required) fb->type = FBUFFER_HEAP_ALLOCATED; MEMCPY(fb->ptr, old_buffer, char, fb->len); } else { - REALLOC_N(fb->ptr, char, required); + JSON_SIZED_REALLOC_N(fb->ptr, char, required, fb->capa); } fb->capa = required; } } -static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_do_inc_capa(FBuffer *fb, size_t requested) { if (RB_UNLIKELY(fb->io)) { if (fb->capa < FBUFFER_IO_BUFFER_SIZE) { @@ -128,7 +108,7 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) } } - unsigned long required; + size_t required; if (RB_UNLIKELY(!fb->ptr)) { fb->ptr = ALLOC_N(char, fb->initial_length); @@ -140,75 +120,141 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) fbuffer_realloc(fb, required); } -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static inline void fbuffer_inc_capa(FBuffer *fb, size_t requested) { +#if JSON_DEBUG + fb->requested = requested; +#endif + if (RB_UNLIKELY(requested > fb->capa - fb->len)) { fbuffer_do_inc_capa(fb, requested); } } -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len) +static inline size_t fbuffer_size_mul_or_raise(size_t a, size_t b) +{ + size_t result = a * b; + if (RB_UNLIKELY(a != 0 && (result / a) != b)) { + rb_raise(rb_eArgError, "Buffer overflow, the resulting document is too large to be generated"); + } + return result; +} + +static inline void fbuffer_append_reserved(FBuffer *fb, const char *newstr, size_t len) +{ + MEMCPY(fb->ptr + fb->len, newstr, char, len); + fbuffer_consumed(fb, len); +} + +static inline void fbuffer_append(FBuffer *fb, const char *newstr, size_t len) { if (len > 0) { fbuffer_inc_capa(fb, len); - MEMCPY(fb->ptr + fb->len, newstr, char, len); - fb->len += len; + fbuffer_append_reserved(fb, newstr, len); + } +} + +/* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */ +static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr) +{ +#if JSON_DEBUG + if (fb->requested < 1) { + rb_bug("fbuffer: unreserved write"); } + fb->requested--; +#endif + + fb->ptr[fb->len] = chr; + fb->len++; } -#ifdef JSON_GENERATOR static void fbuffer_append_str(FBuffer *fb, VALUE str) { - const char *newstr = StringValuePtr(str); - unsigned long len = RSTRING_LEN(str); + const char *ptr; + size_t len; + RSTRING_GETMEM(str, ptr, len); + fbuffer_append(fb, ptr, len); RB_GC_GUARD(str); - - fbuffer_append(fb, newstr, len); } + +static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat) +{ + const char *ptr; + size_t len; + RSTRING_GETMEM(str, ptr, len); + + fbuffer_inc_capa(fb, fbuffer_size_mul_or_raise(repeat, len)); + while (repeat) { +#if JSON_DEBUG + fb->requested = len; #endif + fbuffer_append_reserved(fb, ptr, len); + repeat--; + } + RB_GC_GUARD(str); +} static inline void fbuffer_append_char(FBuffer *fb, char newchr) { fbuffer_inc_capa(fb, 1); *(fb->ptr + fb->len) = newchr; - fb->len++; + fbuffer_consumed(fb, 1); } -#ifdef JSON_GENERATOR -static long fltoa(long number, char *buf) +static inline char *fbuffer_cursor(FBuffer *fb) { - static const char digits[] = "0123456789"; - long sign = number; - char* tmp = buf; + return fb->ptr + fb->len; +} - if (sign < 0) number = -number; - do *tmp-- = digits[number % 10]; while (number /= 10); - if (sign < 0) *tmp-- = '-'; - return buf - tmp; +static inline void fbuffer_advance_to(FBuffer *fb, char *end) +{ + fbuffer_consumed(fb, (end - fb->ptr) - fb->len); } -#define LONG_BUFFER_SIZE 20 +/* + * Appends the decimal string representation of \a number into the buffer. + */ static void fbuffer_append_long(FBuffer *fb, long number) { - char buf[LONG_BUFFER_SIZE]; - char *buffer_end = buf + LONG_BUFFER_SIZE; - long len = fltoa(number, buffer_end - 1); - fbuffer_append(fb, buffer_end - len, len); + /* + * The jeaiii_ultoa() function produces digits left-to-right, + * allowing us to write directly into the buffer, but we don't know + * the number of resulting characters. + * + * We do know, however, that the `number` argument is always in the + * range 0xc000000000000000 to 0x3fffffffffffffff, or, in decimal, + * -4611686018427387904 to 4611686018427387903. The max number of chars + * generated is therefore 20 (including a potential sign character). + */ + + static const int MAX_CHARS_FOR_LONG = 20; + + fbuffer_inc_capa(fb, MAX_CHARS_FOR_LONG); + + if (number < 0) { + fbuffer_append_reserved_char(fb, '-'); + + /* + * Since number is always > LONG_MIN, `-number` will not overflow + * and is always the positive abs() value. + */ + number = -number; + } + + char *end = jeaiii_ultoa(fbuffer_cursor(fb), number); + fbuffer_advance_to(fb, end); } static VALUE fbuffer_finalize(FBuffer *fb) { if (fb->io) { fbuffer_flush(fb); - fbuffer_free(fb); rb_io_flush(fb->io); return fb->io; } else { - VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); - fbuffer_free(fb); - return result; + return rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); } } -#endif -#endif + +#endif // _FBUFFER_H_ diff --git a/ext/json/generator/depend b/ext/json/generator/depend index 65be7b8665..3ba4acfdd2 100644 --- a/ext/json/generator/depend +++ b/ext/json/generator/depend @@ -142,6 +142,7 @@ generator.o: $(hdrdir)/ruby/internal/intern/re.h generator.o: $(hdrdir)/ruby/internal/intern/ruby.h generator.o: $(hdrdir)/ruby/internal/intern/select.h generator.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +generator.o: $(hdrdir)/ruby/internal/intern/set.h generator.o: $(hdrdir)/ruby/internal/intern/signal.h generator.o: $(hdrdir)/ruby/internal/intern/sprintf.h generator.o: $(hdrdir)/ruby/internal/intern/string.h @@ -177,5 +178,9 @@ generator.o: $(hdrdir)/ruby/ruby.h generator.o: $(hdrdir)/ruby/st.h generator.o: $(hdrdir)/ruby/subst.h generator.o: $(srcdir)/../fbuffer/fbuffer.h +generator.o: $(srcdir)/../json.h +generator.o: $(srcdir)/../simd/simd.h +generator.o: $(srcdir)/../vendor/fpconv.c +generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h generator.o: generator.c # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb index 078068cf63..33af03ea30 100644 --- a/ext/json/generator/extconf.rb +++ b/ext/json/generator/extconf.rb @@ -5,6 +5,15 @@ if RUBY_ENGINE == 'truffleruby' File.write('Makefile', dummy_makefile("").join) else append_cflags("-std=c99") + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 + have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + $defs << "-DJSON_GENERATOR" + $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" + + if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" + end + create_makefile 'json/ext/generator' end diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 5006b7853e..82853633ba 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1,37 +1,46 @@ -#include "ruby.h" +#include "../json.h" #include "../fbuffer/fbuffer.h" +#include "../vendor/fpconv.c" #include <math.h> #include <ctype.h> +#include "../simd/simd.h" + /* ruby api and some helpers */ +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + typedef struct JSON_Generator_StateStruct { VALUE indent; VALUE space; VALUE space_before; VALUE object_nl; VALUE array_nl; + VALUE as_json; long max_nesting; long depth; long buffer_initial_length; + enum duplicate_key_action on_duplicate_key; + + bool as_json_single_arg; bool allow_nan; bool ascii_only; bool script_safe; bool strict; } JSON_Generator_State; -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(cond) (cond) -#endif - -static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8; +static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; -static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; -static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, - sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict; +static ID i_to_s, i_to_json, i_new, i_encode; +static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; #define GET_STATE_TO(self, state) \ @@ -43,7 +52,7 @@ static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, struct generate_json_data; -typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj); struct generate_json_data { FBuffer *buffer; @@ -51,43 +60,39 @@ struct generate_json_data { JSON_Generator_State *state; VALUE obj; generator_func func; + long depth; }; +static SIMD_Implementation simd_impl; + static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); -static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -#endif -static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static int usascii_encindex, utf8_encindex, binary_encindex; -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_generator_error_str(VALUE invalid_object, VALUE str) +NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str) { + rb_enc_associate_index(str, utf8_encindex); VALUE exc = rb_exc_new_str(eGeneratorError, str); rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object); rb_exc_raise(exc); } -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif #ifdef RBIMPL_ATTR_FORMAT RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) #endif -static void raise_generator_error(VALUE invalid_object, const char *fmt, ...) +NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...) { va_list args; va_start(args, fmt); @@ -99,21 +104,44 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...) // 0 - single byte char that don't need to be escaped. // (x | 8) - char that needs to be escaped. static const unsigned char CHAR_LENGTH_MASK = 7; +static const unsigned char ESCAPE_MASK = 8; -static const unsigned char escape_table[256] = { - // ASCII Control Characters - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - // ASCII Characters - 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; +typedef struct _search_state { + const char *ptr; + const char *end; + const char *cursor; + FBuffer *buffer; -static const unsigned char ascii_only_escape_table[256] = { +#ifdef HAVE_SIMD + const char *chunk_base; + const char *chunk_end; + bool has_matches; + +#if defined(HAVE_SIMD_NEON) + uint64_t matches_mask; +#elif defined(HAVE_SIMD_SSE2) + int matches_mask; +#else +#error "Unknown SIMD Implementation." +#endif /* HAVE_SIMD_NEON */ +#endif /* HAVE_SIMD */ +} search_state; + +ALWAYS_INLINE(static) void search_flush(search_state *search) +{ + // Do not remove this conditional without profiling, specifically escape-heavy text. + // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush). + // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method + // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the + // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if + // nothing needs to be flushed, we can save a few memory references with this conditional. + if (search->ptr > search->cursor) { + fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); + search->cursor = search->ptr; + } +} + +static const unsigned char escape_table_basic[256] = { // ASCII Control Characters 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, @@ -124,44 +152,46 @@ static const unsigned char ascii_only_escape_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Continuation byte - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - // First byte of a 2-byte code point - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - // First byte of a 3-byte code point - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - //First byte of a 4+ byte code point - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, }; -static const unsigned char script_safe_escape_table[256] = { - // ASCII Control Characters - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - // ASCII Characters - 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Continuation byte - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - // First byte of a 2-byte code point - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - // First byte of a 3-byte code point - 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 - //First byte of a 4+ byte code point - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, -}; +static inline unsigned char search_escape_basic(search_state *search) +{ + while (search->ptr < search->end) { + if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) { + search_flush(search); + return 1; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; +} + +ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + search->ptr++; + search->cursor = search->ptr; +} /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. @@ -179,464 +209,495 @@ static const unsigned char script_safe_escape_table[256] = { * Everything else (should be UTF-8) is just passed through and * appended to the result. */ -static inline void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const unsigned char escape_table[256]) -{ - const char *hexdig = "0123456789abcdef"; - char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; - - const char *ptr = RSTRING_PTR(str); - unsigned long len = RSTRING_LEN(str); - unsigned long beg = 0, pos = 0; -#define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos; +#if defined(HAVE_SIMD_NEON) +static inline unsigned char search_escape_basic_neon(search_state *search); +#elif defined(HAVE_SIMD_SSE2) +static inline unsigned char search_escape_basic_sse2(search_state *search); +#endif - while (pos < len) { - unsigned char ch = ptr[pos]; - unsigned char ch_len = escape_table[ch]; - /* JSON encoding */ +static inline unsigned char search_escape_basic(search_state *search); - if (RB_UNLIKELY(ch_len)) { - switch (ch_len) { - case 9: { - FLUSH_POS(1); - switch (ch) { - case '"': fbuffer_append(out_buffer, "\\\"", 2); break; - case '\\': fbuffer_append(out_buffer, "\\\\", 2); break; - case '/': fbuffer_append(out_buffer, "\\/", 2); break; - case '\b': fbuffer_append(out_buffer, "\\b", 2); break; - case '\f': fbuffer_append(out_buffer, "\\f", 2); break; - case '\n': fbuffer_append(out_buffer, "\\n", 2); break; - case '\r': fbuffer_append(out_buffer, "\\r", 2); break; - case '\t': fbuffer_append(out_buffer, "\\t", 2); break; - default: { - scratch[2] = '0'; - scratch[3] = '0'; - scratch[4] = hexdig[(ch >> 4) & 0xf]; - scratch[5] = hexdig[ch & 0xf]; - fbuffer_append(out_buffer, scratch, 6); - break; - } - } +static inline void convert_UTF8_to_JSON(search_state *search) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + while (search_escape_basic_neon(search)) { + escape_UTF8_char_basic(search); + } +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + while (search_escape_basic_sse2(search)) { + escape_UTF8_char_basic(search); + } + return; + } + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif +#else + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif /* HAVE_SIMD */ +} + +static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); break; } - case 11: { - unsigned char b2 = ptr[pos + 1]; - if (RB_UNLIKELY(b2 == 0x80)) { - unsigned char b3 = ptr[pos + 2]; - if (b3 == 0xA8) { - FLUSH_POS(3); - fbuffer_append(out_buffer, "\\u2028", 6); - break; - } else if (b3 == 0xA9) { - FLUSH_POS(3); - fbuffer_append(out_buffer, "\\u2029", 6); - break; - } - } - ch_len = 3; - // fallthrough - } - default: - pos += ch_len; - break; } - } else { - pos++; + break; + } + case 3: { + if (search->ptr[2] & 1) { + fbuffer_append(search->buffer, "\\u2029", 6); + } else { + fbuffer_append(search->buffer, "\\u2028", 6); + } + break; } } -#undef FLUSH_POS - - if (beg < len) { - fbuffer_append(out_buffer, &ptr[beg], len - beg); - } - - RB_GC_GUARD(str); + search->cursor = (search->ptr += ch_len); } -static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const unsigned char escape_table[256]) +#ifdef HAVE_SIMD + +ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) { - const char *hexdig = "0123456789abcdef"; - char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; + RBIMPL_ASSERT_OR_ASSUME(len < vec_len); - const char *ptr = RSTRING_PTR(str); - unsigned long len = RSTRING_LEN(str); + // Flush the buffer so everything up until the last 'len' characters are unflushed. + search_flush(search); - unsigned long beg = 0, pos = 0; + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, vec_len); -#define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos; + char *s = (buf->ptr + buf->len); - while (pos < len) { - unsigned char ch = ptr[pos]; - unsigned char ch_len = escape_table[ch]; + // Pad the buffer with dummy characters that won't need escaping. + // This seem wasteful at first sight, but memset of vector length is very fast. + // This is a space as it can be directly represented as an immediate on AArch64. + memset(s, ' ', vec_len); - if (RB_UNLIKELY(ch_len)) { - switch (ch_len) { - case 9: { - FLUSH_POS(1); - switch (ch) { - case '"': fbuffer_append(out_buffer, "\\\"", 2); break; - case '\\': fbuffer_append(out_buffer, "\\\\", 2); break; - case '/': fbuffer_append(out_buffer, "\\/", 2); break; - case '\b': fbuffer_append(out_buffer, "\\b", 2); break; - case '\f': fbuffer_append(out_buffer, "\\f", 2); break; - case '\n': fbuffer_append(out_buffer, "\\n", 2); break; - case '\r': fbuffer_append(out_buffer, "\\r", 2); break; - case '\t': fbuffer_append(out_buffer, "\\t", 2); break; - default: { - scratch[2] = '0'; - scratch[3] = '0'; - scratch[4] = hexdig[(ch >> 4) & 0xf]; - scratch[5] = hexdig[ch & 0xf]; - fbuffer_append(out_buffer, scratch, 6); - break; - } - } - break; - } - default: { - uint32_t wchar = 0; - ch_len = ch_len & CHAR_LENGTH_MASK; - - switch(ch_len) { - case 2: - wchar = ptr[pos] & 0x1F; - break; - case 3: - wchar = ptr[pos] & 0x0F; - break; - case 4: - wchar = ptr[pos] & 0x07; - break; - } + // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + if (vec_len == 16) { + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); + json_fast_memcpy16(s, search->ptr, len); + } else { + MEMCPY(s, search->ptr, char, len); + } - for (short i = 1; i < ch_len; i++) { - wchar = (wchar << 6) | (ptr[pos+i] & 0x3F); - } + return s; +} - FLUSH_POS(ch_len); +#ifdef HAVE_SIMD_NEON - if (wchar <= 0xFFFF) { - scratch[2] = hexdig[wchar >> 12]; - scratch[3] = hexdig[(wchar >> 8) & 0xf]; - scratch[4] = hexdig[(wchar >> 4) & 0xf]; - scratch[5] = hexdig[wchar & 0xf]; - fbuffer_append(out_buffer, scratch, 6); - } else { - uint16_t hi, lo; - wchar -= 0x10000; - hi = 0xD800 + (uint16_t)(wchar >> 10); - lo = 0xDC00 + (uint16_t)(wchar & 0x3FF); - - scratch[2] = hexdig[hi >> 12]; - scratch[3] = hexdig[(hi >> 8) & 0xf]; - scratch[4] = hexdig[(hi >> 4) & 0xf]; - scratch[5] = hexdig[hi & 0xf]; - - scratch[8] = hexdig[lo >> 12]; - scratch[9] = hexdig[(lo >> 8) & 0xf]; - scratch[10] = hexdig[(lo >> 4) & 0xf]; - scratch[11] = hexdig[lo & 0xf]; - - fbuffer_append(out_buffer, scratch, 12); - } +ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search) +{ + uint64_t mask = search->matches_mask; + uint32_t index = trailing_zeros64(mask) >> 2; - break; - } - } + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} + +static inline unsigned char search_escape_basic_neon(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return neon_next_match(search); } else { - pos++; + // neon_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + search->ptr = search->chunk_end; } } -#undef FLUSH_POS - if (beg < len) { - fbuffer_append(out_buffer, &ptr[beg], len - beg); + /* + * The code below implements an SIMD-based algorithm to determine if N bytes at a time + * need to be escaped. + * + * Assume the ptr = "Te\sting!" (the double quotes are included in the string) + * + * The explanation will be limited to the first 8 bytes of the string for simplicity. However + * the vector insructions may work on larger vectors. + * + * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers. + * + * lower_bound: [20 20 20 20 20 20 20 20] + * backslash: [5C 5C 5C 5C 5C 5C 5C 5C] + * dblquote: [22 22 22 22 22 22 22 22] + * + * Next we load the first chunk of the ptr: + * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n) + * + * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector + * as no bytes are less than 32 (0x20): + * [0 0 0 0 0 0 0 0] + * + * Next, we check if any byte in chunk is equal to a backslash: + * [0 0 0 FF 0 0 0 0] + * + * Finally we check if any byte in chunk is equal to a double quote: + * [FF 0 0 0 0 0 0 0] + * + * Now we have three vectors where each byte indicates if the corresponding byte in chunk + * needs to be escaped. We combine these vectors with a series of logical OR instructions. + * This is the needs_escape vector and it is equal to: + * [FF 0 0 FF 0 0 0 0] + * + * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of + * the values in the vector. This computes how many bytes need to be escaped within this chunk. + * + * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then, + * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we + * have at least one byte that needs to be escaped. + */ + + if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(uint8x16_t); + return neon_next_match(search); } - RB_GC_GUARD(str); -} + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining); -/* - * Document-module: JSON::Ext::Generator - * - * This is the JSON generator implemented as a C extension. It can be - * configured to be used by setting - * - * JSON.generator = JSON::Ext::Generator - * - * with the method generator= in JSON. - * - */ + uint64_t mask = compute_chunk_mask_neon(s); -/* Explanation of the following: that's the only way to not pollute - * standard library's docs with GeneratorMethods::<ClassName> which - * are uninformative and take a large place in a list of classes - */ + if (!mask) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods - * :nodoc: - */ + search->matches_mask = mask; + search->has_matches = true; + search->chunk_end = search->end; + search->chunk_base = search->ptr; + return neon_next_match(search); + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Array - * :nodoc: - */ + if (search->ptr < search->end) { + return search_escape_basic(search); + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Bignum - * :nodoc: - */ + search_flush(search); + return 0; +} +#endif /* HAVE_SIMD_NEON */ -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::FalseClass - * :nodoc: - */ +#ifdef HAVE_SIMD_SSE2 -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Fixnum - * :nodoc: - */ +ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search) +{ + int mask = search->matches_mask; + int index = trailing_zeros(mask); -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Float - * :nodoc: - */ + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Hash - * :nodoc: - */ +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Integer - * :nodoc: - */ +ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return sse2_next_match(search); + } else { + // sse2_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) { + search->ptr = search->end; + } else { + search->ptr = search->chunk_base + sizeof(__m128i); + } + } + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::NilClass - * :nodoc: - */ + if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(__m128i); + return sse2_next_match(search); + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Object - * :nodoc: - */ + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining); -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::String - * :nodoc: - */ + int needs_escape_mask = compute_chunk_mask_sse2(s); -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::String::Extend - * :nodoc: - */ + if (needs_escape_mask == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::TrueClass - * :nodoc: - */ + search->has_matches = true; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON object, that is generated from - * this Hash instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_object, Qfalse); -} + if (search->ptr < search->end) { + return search_escape_basic(search); + } -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON array, that is generated from - * this Array instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_array, Qfalse); + search_flush(search); + return 0; } -#ifdef RUBY_INTEGER_UNIFICATION -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse); -} +#endif /* HAVE_SIMD_SSE2 */ -#else -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse); -} +#endif /* HAVE_SIMD */ -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse); -} -#endif +static const unsigned char script_safe_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Float number. - */ -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) +static inline unsigned char search_script_safe_escape(search_state *search) { - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_float, Qfalse); -} + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = script_safe_escape_table[ch]; -/* - * call-seq: String.included(modul) - * - * Extends _modul_ with the String::Extend module. - */ -static VALUE mString_included_s(VALUE self, VALUE modul) { - VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); - rb_call_super(1, &modul); - return result; + if (RB_UNLIKELY(ch_len)) { + if (ch_len & ESCAPE_MASK) { + if (RB_UNLIKELY(ch_len == 11)) { + const unsigned char *uptr = (const unsigned char *)search->ptr; + if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) { + search->ptr += 3; + continue; + } + } + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr += ch_len; + } + } else { + search->ptr++; + } + } + search_flush(search); + return 0; } -/* - * call-seq: to_json(*) - * - * This string should be encoded with UTF-8 A call to this method - * returns a JSON string encoded with UTF16 big endian characters as - * \u????. - */ -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) +static void convert_UTF8_to_script_safe_JSON(search_state *search) { - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_string, Qfalse); + unsigned char ch_len; + while ((ch_len = search_script_safe_escape(search))) { + escape_UTF8_char(search, ch_len); + } } -/* - * call-seq: to_json_raw_object() - * - * This method creates a raw object hash, that can be nested into - * other data structures and will be generated as a raw string. This - * method should be used, if you want to convert raw strings to JSON - * instead of UTF-8 strings, e. g. binary data. - */ -static VALUE mString_to_json_raw_object(VALUE self) -{ - VALUE ary; - VALUE result = rb_hash_new(); - rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); - ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary); - return result; -} +static const unsigned char ascii_only_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; -/* - * call-seq: to_json_raw(*args) - * - * This method creates a JSON text from the result of a call to - * to_json_raw_object of this String. - */ -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) +static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256]) { - VALUE obj = mString_to_json_raw_object(self); - Check_Type(obj, T_HASH); - return mHash_to_json(argc, argv, obj); -} + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = escape_table[ch]; -/* - * call-seq: json_create(o) - * - * Raw Strings are JSON Objects (the raw bytes are stored in an array for the - * key "raw"). The Ruby String can be created by this module method. - */ -static VALUE mString_Extend_json_create(VALUE self, VALUE o) -{ - VALUE ary; - Check_Type(o, T_HASH); - ary = rb_hash_aref(o, rb_str_new2("raw")); - return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); + if (RB_UNLIKELY(ch_len)) { + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; } -/* - * call-seq: to_json(*) - * - * Returns a JSON string for true: 'true'. - */ -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("true", 4); -} +static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + break; + } + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; -/* - * call-seq: to_json(*) - * - * Returns a JSON string for false: 'false'. - */ -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("false", 5); -} + uint32_t wchar = 0; -/* - * call-seq: to_json(*) - * - * Returns a JSON string for nil: 'null'. - */ -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("null", 4); + switch (ch_len) { + case 2: + wchar = ch & 0x1F; + break; + case 3: + wchar = ch & 0x0F; + break; + case 4: + wchar = ch & 0x07; + break; + } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (search->ptr[i] & 0x3F); + } + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + } else { + uint16_t hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (uint16_t)(wchar >> 10); + lo = 0xDC00 + (uint16_t)(wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + fbuffer_append(search->buffer, scratch, 12); + } + + break; + } + } + search->cursor = (search->ptr += ch_len); } -/* - * call-seq: to_json(*) - * - * Converts this object to a string (calling #to_s), converts - * it to a JSON string, and returns the result. This is a fallback, if no - * special method #to_json was defined for some object. - */ -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) +static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256]) { - VALUE state; - VALUE string = rb_funcall(self, i_to_s, 0); - rb_scan_args(argc, argv, "01", &state); - Check_Type(string, T_STRING); - state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string, generate_json_string, Qfalse); + unsigned char ch_len; + while ((ch_len = search_ascii_only_escape(search, escape_table))) { + full_escape_UTF8_char(search, ch_len); + } } static void State_mark(void *ptr) @@ -647,6 +708,7 @@ static void State_mark(void *ptr) rb_gc_mark_movable(state->space_before); rb_gc_mark_movable(state->object_nl); rb_gc_mark_movable(state->array_nl); + rb_gc_mark_movable(state->as_json); } static void State_compact(void *ptr) @@ -657,34 +719,27 @@ static void State_compact(void *ptr) state->space_before = rb_gc_location(state->space_before); state->object_nl = rb_gc_location(state->object_nl); state->array_nl = rb_gc_location(state->array_nl); -} - -static void State_free(void *ptr) -{ - JSON_Generator_State *state = ptr; - ruby_xfree(state); + state->as_json = rb_gc_location(state->as_json); } static size_t State_memsize(const void *ptr) { +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + return 0; +#else return sizeof(JSON_Generator_State); -} - -#ifndef HAVE_RB_EXT_RACTOR_SAFE -# undef RUBY_TYPED_FROZEN_SHAREABLE -# define RUBY_TYPED_FROZEN_SHAREABLE 0 #endif +} static const rb_data_type_t JSON_Generator_State_type = { - "JSON/Generator/State", - { + .wrap_struct_name = "JSON/Generator/State", + .function = { .dmark = State_mark, - .dfree = State_free, + .dfree = RUBY_DEFAULT_FREE, .dsize = State_memsize, .dcompact = State_compact, }, - 0, 0, - RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static void state_init(JSON_Generator_State *state) @@ -713,21 +768,196 @@ static void vstate_spill(struct generate_json_data *data) RB_OBJ_WRITTEN(vstate, Qundef, state->space_before); RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); } -static inline VALUE vstate_get(struct generate_json_data *data) +static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) { if (RB_UNLIKELY(!data->vstate)) { vstate_spill(data); } - return data->vstate; + GET_STATE(data->vstate); + state->depth = data->depth; + VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate); + // no need to restore state->depth, vstate is just a temporary State + return tmp; +} + +static VALUE +json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key) +{ + VALUE proc_args[2] = {object, is_key}; + return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil); +} + +static VALUE +convert_string_subclass(VALUE key) +{ + VALUE key_to_s = rb_funcall(key, i_to_s, 0); + + if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) { + VALUE cname = rb_obj_class(key); + rb_raise(rb_eTypeError, + "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")", + cname, "String", cname, "to_s", rb_obj_class(key_to_s)); + } + + return key_to_s; +} + +static bool enc_utf8_compatible_p(int enc_idx) +{ + if (enc_idx == usascii_encindex) return true; + if (enc_idx == utf8_encindex) return true; + return false; +} + +static VALUE encode_json_string_try(VALUE str) +{ + return rb_funcall(str, i_encode, 1, Encoding_UTF_8); +} + +static VALUE encode_json_string_rescue(VALUE str, VALUE exception) +{ + raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); + return Qundef; +} + +static inline int json_str_coderange(VALUE str) { + int coderange = RB_ENC_CODERANGE(str); + if (coderange == RUBY_ENC_CODERANGE_UNKNOWN) { + coderange = rb_enc_str_coderange(str); + } + return coderange; +} + +static inline bool valid_json_string_p(VALUE str) +{ + int coderange = json_str_coderange(str); + + if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) { + return true; + } + + if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) { + return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str)); + } + + return false; +} + +NOINLINE(static) VALUE convert_invalid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) { + VALUE coerced_str = json_call_as_json(data->state, str, Qfalse); + if (coerced_str != str) { + if (RB_TYPE_P(coerced_str, T_STRING)) { + if (!valid_json_string_p(coerced_str)) { + raise_generator_error(str, "source sequence is illegal/malformed utf-8"); + } + } else { + // as_json could return another type than T_STRING + if (is_key) { + raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str)); + } + } + + return coerced_str; + } + } + + if (RB_ENCODING_GET_INLINED(str) == binary_encindex) { + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + return utf8_string; + case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8_string; + break; + } + } + + return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); +} + +ALWAYS_INLINE(static) VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (RB_LIKELY(valid_json_string_p(str))) { + return str; + } + else { + return convert_invalid_encoding(data, str, as_json_called, is_key); + } +} + +static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + fbuffer_append_char(buffer, '"'); + + long len; + search_state search; + search.buffer = buffer; + RSTRING_GETMEM(obj, search.ptr, len); + search.cursor = search.ptr; + search.end = search.ptr + len; + +#ifdef HAVE_SIMD + search.matches_mask = 0; + search.has_matches = false; + search.chunk_base = NULL; + search.chunk_end = NULL; +#endif /* HAVE_SIMD */ + + switch (json_str_coderange(obj)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + if (RB_UNLIKELY(data->state->ascii_only)) { + convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); + } else if (RB_UNLIKELY(data->state->script_safe)) { + convert_UTF8_to_script_safe_JSON(&search); + } else { + convert_UTF8_to_JSON(&search); + } + break; + default: + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + break; + } + fbuffer_append_char(buffer, '"'); +} + +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + obj = ensure_valid_encoding(data, obj, false, false); + raw_generate_json_string(buffer, data, obj); } struct hash_foreach_arg { + VALUE hash; struct generate_json_data *data; - int iter; + int first_key_type; + bool first; + bool mixed_keys_encountered; }; +NOINLINE(static) void +json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg) +{ + if (arg->mixed_keys_encountered) { + return; + } + arg->mixed_keys_encountered = true; + + JSON_Generator_State *state = arg->data->state; + if (state->on_duplicate_key != JSON_IGNORE) { + VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse; + rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise); + } +} + static int json_object_i(VALUE key, VALUE val, VALUE _arg) { @@ -737,254 +967,257 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) FBuffer *buffer = data->buffer; JSON_Generator_State *state = data->state; - long depth = state->depth; - int j; + long depth = data->depth; + int key_type = rb_type(key); - if (arg->iter > 0) fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append_str(buffer, state->object_nl); + if (arg->first) { + arg->first = false; + arg->first_key_type = key_type; } - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + else { + fbuffer_append_char(buffer, ','); + } + + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } VALUE key_to_s; - switch(rb_type(key)) { + bool as_json_called = false; + + start: + switch (key_type) { case T_STRING: + if (RB_UNLIKELY(arg->first_key_type != T_STRING)) { + json_inspect_hash_with_mixed_keys(arg); + } + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { key_to_s = key; } else { - key_to_s = rb_funcall(key, i_to_s, 0); + key_to_s = convert_string_subclass(key); } break; case T_SYMBOL: + if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) { + json_inspect_hash_with_mixed_keys(arg); + } + key_to_s = rb_sym2str(key); break; default: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + key = json_call_as_json(data->state, key, Qtrue); + key_type = rb_type(key); + as_json_called = true; + goto start; + } else { + raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key)); + } + } key_to_s = rb_convert_type(key, T_STRING, "String", "to_s"); break; } + key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true); + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { - generate_json_string(buffer, data, state, key_to_s); + raw_generate_json_string(buffer, data, key_to_s); } else { - generate_json(buffer, data, state, key_to_s); + generate_json(buffer, data, key_to_s); } - if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before); fbuffer_append_char(buffer, ':'); - if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); - generate_json(buffer, data, state, val); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); + generate_json(buffer, data, val); - arg->iter++; return ST_CONTINUE; } -static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static inline long increase_depth(struct generate_json_data *data) { - long max_nesting = state->max_nesting; - long depth = ++state->depth; - int j; - - if (max_nesting != 0 && depth > max_nesting) { - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); + JSON_Generator_State *state = data->state; + long depth = ++data->depth; + if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) { + rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth); } + return depth; +} + +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + long depth = increase_depth(data); if (RHASH_SIZE(obj) == 0) { fbuffer_append(buffer, "{}", 2); - --state->depth; + --data->depth; return; } fbuffer_append_char(buffer, '{'); struct hash_foreach_arg arg = { + .hash = obj, .data = data, - .iter = 0, + .first = true, }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); - depth = --state->depth; - if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append_str(buffer, state->object_nl); - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + depth = --data->depth; + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - long max_nesting = state->max_nesting; - long depth = ++state->depth; - int i, j; - if (max_nesting != 0 && depth > max_nesting) { - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); - } + long depth = increase_depth(data); if (RARRAY_LEN(obj) == 0) { fbuffer_append(buffer, "[]", 2); - --state->depth; + --data->depth; return; } fbuffer_append_char(buffer, '['); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); - for(i = 0; i < RARRAY_LEN(obj); i++) { + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); + for (int i = 0; i < RARRAY_LEN(obj); i++) { if (i > 0) { fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); } - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } - generate_json(buffer, data, state, RARRAY_AREF(obj, i)); + generate_json(buffer, data, RARRAY_AREF(obj, i)); } - state->depth = --depth; - if (RB_UNLIKELY(state->array_nl)) { - fbuffer_append_str(buffer, state->array_nl); - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + data->depth = --depth; + if (RB_UNLIKELY(data->state->array_nl)) { + fbuffer_append_str(buffer, data->state->array_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, ']'); } -static inline int enc_utf8_compatible_p(int enc_idx) -{ - if (enc_idx == usascii_encindex) return 1; - if (enc_idx == utf8_encindex) return 1; - return 0; -} - -static VALUE encode_json_string_try(VALUE str) +static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - return rb_funcall(str, i_encode, 1, Encoding_UTF_8); -} - -static VALUE encode_json_string_rescue(VALUE str, VALUE exception) -{ - raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); - return Qundef; -} - -static inline VALUE ensure_valid_encoding(VALUE str) -{ - int encindex = RB_ENCODING_GET(str); - VALUE utf8_string; - if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { - if (encindex == binary_encindex) { - utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); - switch (rb_enc_str_coderange(utf8_string)) { - case ENC_CODERANGE_7BIT: - return utf8_string; - case ENC_CODERANGE_VALID: - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Raise in 3.0.0 - rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); - return utf8_string; - break; - } - } - - str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); + VALUE tmp; + if (rb_respond_to(obj, i_to_json)) { + tmp = json_call_to_json(data, obj); + Check_Type(tmp, T_STRING); + fbuffer_append_str(buffer, tmp); + } else { + tmp = rb_funcall(obj, i_to_s, 0); + Check_Type(tmp, T_STRING); + generate_json_string(buffer, data, tmp); } - return str; } -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - obj = ensure_valid_encoding(obj); - - fbuffer_append_char(buffer, '"'); - - switch(rb_enc_str_coderange(obj)) { - case ENC_CODERANGE_7BIT: - case ENC_CODERANGE_VALID: - if (RB_UNLIKELY(state->ascii_only)) { - convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : ascii_only_escape_table); - } else { - convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table); - } - break; - default: - raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); - break; + if (data->state->strict) { + generate_json_string(buffer, data, rb_sym2str(obj)); + } else { + generate_json_fallback(buffer, data, obj); } - fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); - fbuffer_append_str(buffer, tmp); + fbuffer_append_str(buffer, StringValue(tmp)); } -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) -{ - if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, data, state, obj); - else - generate_json_bignum(buffer, data, state, obj); -} -#endif - -static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { double value = RFLOAT_VALUE(obj); - char allow_nan = state->allow_nan; - VALUE tmp = rb_funcall(obj, i_to_s, 0); - if (!allow_nan) { - if (isinf(value) || isnan(value)) { - raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp); + char allow_nan = data->state->allow_nan; + if (isinf(value) || isnan(value)) { + /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */ + if (!allow_nan) { + if (data->state->strict && data->state->as_json) { + VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse); + if (casted_obj != obj) { + increase_depth(data); + generate_json(buffer, data, casted_obj); + data->depth--; + return; + } + } + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0)); } + + VALUE tmp = rb_funcall(obj, i_to_s, 0); + fbuffer_append_str(buffer, tmp); + return; } - fbuffer_append_str(buffer, tmp); + + /* This implementation writes directly into the buffer. We reserve + * the 32 characters that fpconv_dtoa states as its maximum. + */ + fbuffer_inc_capa(buffer, 32); + char* d = buffer->ptr + buffer->len; + int len = fpconv_dtoa(value, d); + /* fpconv_dtoa converts a float to its shortest string representation, + * but it adds a ".0" if this is a plain integer. + */ + fbuffer_consumed(buffer, len); } -static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE tmp; + VALUE fragment = RSTRUCT_GET(obj, 0); + Check_Type(fragment, T_STRING); + fbuffer_append_str(buffer, fragment); +} + +static inline void generate_json_general(FBuffer *buffer, struct generate_json_data *data, VALUE obj, bool fallback) +{ + bool as_json_called = false; +start: if (obj == Qnil) { - generate_json_null(buffer, data, state, obj); + generate_json_null(buffer, data, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, data, state, obj); + generate_json_false(buffer, data, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, data, state, obj); + generate_json_true(buffer, data, obj); } else if (RB_SPECIAL_CONST_P(obj)) { if (RB_FIXNUM_P(obj)) { - generate_json_fixnum(buffer, data, state, obj); + generate_json_fixnum(buffer, data, obj); } else if (RB_FLONUM_P(obj)) { - generate_json_float(buffer, data, state, obj); + generate_json_float(buffer, data, obj); + } else if (RB_STATIC_SYM_P(obj)) { + generate_json_symbol(buffer, data, obj); } else { goto general; } @@ -992,61 +1225,85 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON VALUE klass = RBASIC_CLASS(obj); switch (RB_BUILTIN_TYPE(obj)) { case T_BIGNUM: - generate_json_bignum(buffer, data, state, obj); + generate_json_bignum(buffer, data, obj); break; case T_HASH: - if (klass != rb_cHash) goto general; - generate_json_object(buffer, data, state, obj); + if (fallback && klass != rb_cHash) goto general; + generate_json_object(buffer, data, obj); break; case T_ARRAY: - if (klass != rb_cArray) goto general; - generate_json_array(buffer, data, state, obj); + if (fallback && klass != rb_cArray) goto general; + generate_json_array(buffer, data, obj); break; case T_STRING: - if (klass != rb_cString) goto general; - generate_json_string(buffer, data, state, obj); + if (fallback && klass != rb_cString) goto general; + + if (RB_LIKELY(valid_json_string_p(obj))) { + raw_generate_json_string(buffer, data, obj); + } else if (as_json_called) { + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + } else { + obj = ensure_valid_encoding(data, obj, false, false); + as_json_called = true; + goto start; + } + break; + case T_SYMBOL: + generate_json_symbol(buffer, data, obj); break; case T_FLOAT: - if (klass != rb_cFloat) goto general; - generate_json_float(buffer, data, state, obj); + if (fallback && klass != rb_cFloat) goto general; + generate_json_float(buffer, data, obj); + break; + case T_STRUCT: + if (klass != cFragment) goto general; + generate_json_fragment(buffer, data, obj); break; default: general: - if (state->strict) { - raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); - } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); - Check_Type(tmp, T_STRING); - fbuffer_append_str(buffer, tmp); + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + obj = json_call_as_json(data->state, obj, Qfalse); + as_json_called = true; + goto start; + } else { + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); + } } else { - tmp = rb_funcall(obj, i_to_s, 0); - Check_Type(tmp, T_STRING); - generate_json_string(buffer, data, state, tmp); + generate_json_fallback(buffer, data, obj); } } } } +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, true); +} + +static void generate_json_no_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, false); +} + static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - data->func(data->buffer, data, data->state, data->obj); + data->func(data->buffer, data, data->obj); - return Qnil; + return fbuffer_finalize(data->buffer); } -static VALUE generate_json_rescue(VALUE d, VALUE exc) +static VALUE generate_json_ensure(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; fbuffer_free(data->buffer); - rb_exc_raise(exc); - return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) +static inline VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) { GET_STATE(self); @@ -1058,14 +1315,13 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, struct generate_json_data data = { .buffer = &buffer, - .vstate = self, + .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json .state = state, + .depth = state->depth, .obj = obj, .func = func }; - rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - - return fbuffer_finalize(&buffer); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); } /* call-seq: @@ -1081,10 +1337,16 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self) rb_check_arity(argc, 1, 2); VALUE obj = argv[0]; VALUE io = argc > 1 ? argv[1] : Qnil; - VALUE result = cState_partial_generate(self, obj, generate_json, io); - GET_STATE(self); - (void)state; - return result; + return cState_partial_generate(self, obj, generate_json, io); +} + +/* :nodoc: */ +static VALUE cState_generate_no_fallback(int argc, VALUE *argv, VALUE self) +{ + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json_no_fallback, io); } static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) @@ -1109,11 +1371,14 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = origState->indent; - objState->space = origState->space; - objState->space_before = origState->space_before; - objState->object_nl = origState->object_nl; - objState->array_nl = origState->array_nl; + + RB_OBJ_WRITTEN(obj, Qundef, objState->indent); + RB_OBJ_WRITTEN(obj, Qundef, objState->space); + RB_OBJ_WRITTEN(obj, Qundef, objState->space_before); + RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->as_json); + return obj; } @@ -1164,6 +1429,7 @@ static VALUE string_config(VALUE config) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; @@ -1189,6 +1455,7 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; @@ -1212,6 +1479,7 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; @@ -1237,6 +1505,7 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; @@ -1260,11 +1529,35 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; } +/* + * call-seq: as_json() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json(VALUE self) +{ + GET_STATE(self); + return state->as_json; +} + +/* + * call-seq: as_json=(as_json) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json_set(VALUE self, VALUE as_json) +{ + rb_check_frozen(self); + GET_STATE(self); + RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc")); + return Qnil; +} /* * call-seq: check_circular? @@ -1292,7 +1585,21 @@ static VALUE cState_max_nesting(VALUE self) static long long_config(VALUE num) { - return RTEST(num) ? FIX2LONG(num) : 0; + return RTEST(num) ? NUM2LONG(num) : 0; +} + +// depth must never be negative; reject early with a clear error. +static long depth_config(VALUE num) +{ + if (!RTEST(num)) return 0; + long d = NUM2LONG(num); + if (RB_UNLIKELY(d < 0)) { + rb_raise(rb_eArgError, "depth must be >= 0 (got %ld)", d); + } + if (RB_UNLIKELY(d > INT_MAX)) { + rb_raise(rb_eArgError, "depth is too large (got %ld)", d); + } + return d; } /* @@ -1303,6 +1610,7 @@ static long long_config(VALUE num) */ static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); state->max_nesting = long_config(depth); return Qnil; @@ -1328,6 +1636,7 @@ static VALUE cState_script_safe(VALUE self) */ static VALUE cState_script_safe_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->script_safe = RTEST(enable); return Qnil; @@ -1359,6 +1668,7 @@ static VALUE cState_strict(VALUE self) */ static VALUE cState_strict_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->strict = RTEST(enable); return Qnil; @@ -1383,6 +1693,7 @@ static VALUE cState_allow_nan_p(VALUE self) */ static VALUE cState_allow_nan_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->allow_nan = RTEST(enable); return Qnil; @@ -1407,11 +1718,25 @@ static VALUE cState_ascii_only_p(VALUE self) */ static VALUE cState_ascii_only_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->ascii_only = RTEST(enable); return Qnil; } +static VALUE cState_allow_duplicate_key_p(VALUE self) +{ + GET_STATE(self); + switch (state->on_duplicate_key) { + case JSON_IGNORE: + return Qtrue; + case JSON_DEPRECATED: + return Qnil; + default: + return Qfalse; + } +} + /* * call-seq: depth * @@ -1431,8 +1756,9 @@ static VALUE cState_depth(VALUE self) */ static VALUE cState_depth_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); - state->depth = long_config(depth); + state->depth = depth_config(depth); return Qnil; } @@ -1464,32 +1790,54 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_ */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { + rb_check_frozen(self); GET_STATE(self); buffer_initial_length_set(state, buffer_initial_length); return Qnil; } +struct configure_state_data { + JSON_Generator_State *state; + VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated +}; + +static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value) +{ + if (RTEST(data->vstate)) { + RB_OBJ_WRITE(data->vstate, field, value); + } else { + *field = value; + } +} + static int configure_state_i(VALUE key, VALUE val, VALUE _arg) { - JSON_Generator_State *state = (JSON_Generator_State *)_arg; + struct configure_state_data *data = (struct configure_state_data *)_arg; + JSON_Generator_State *state = data->state; - if (key == sym_indent) { state->indent = string_config(val); } - else if (key == sym_space) { state->space = string_config(val); } - else if (key == sym_space_before) { state->space_before = string_config(val); } - else if (key == sym_object_nl) { state->object_nl = string_config(val); } - else if (key == sym_array_nl) { state->array_nl = string_config(val); } + if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); } + else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); } + else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); } + else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); } + else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); } else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } - else if (key == sym_depth) { state->depth = long_config(val); } + else if (key == sym_depth) { state->depth = depth_config(val); } else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } else if (key == sym_script_safe) { state->script_safe = RTEST(val); } else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } else if (key == sym_strict) { state->strict = RTEST(val); } + else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_as_json) { + VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; + state->as_json_single_arg = proc && rb_proc_arity(proc) == 1; + state_write_value(data, &state->as_json, proc); + } return ST_CONTINUE; } -static void configure_state(JSON_Generator_State *state, VALUE config) +static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config) { if (!RTEST(config)) return; @@ -1497,23 +1845,29 @@ static void configure_state(JSON_Generator_State *state, VALUE config) if (!RHASH_SIZE(config)) return; + struct configure_state_data data = { + .state = state, + .vstate = vstate + }; + // We assume in most cases few keys are set so it's faster to go over // the provided keys than to check all possible keys. - rb_hash_foreach(config, configure_state_i, (VALUE)state); + rb_hash_foreach(config, configure_state_i, (VALUE)&data); } static VALUE cState_configure(VALUE self, VALUE opts) { + rb_check_frozen(self); GET_STATE(self); - configure_state(state, opts); + configure_state(state, self, opts); return self; } -static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +static VALUE cState_m_do_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io, generator_func func) { JSON_Generator_State state = {0}; state_init(&state); - configure_state(&state, opts); + configure_state(&state, Qfalse, opts); char stack_buffer[FBUFFER_STACK_SIZE]; FBuffer buffer = { @@ -1525,17 +1879,23 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) .buffer = &buffer, .vstate = Qfalse, .state = &state, + .depth = state.depth, .obj = obj, - .func = generate_json, + .func = func, }; - rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} - return fbuffer_finalize(&buffer); +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json); +} + +static VALUE cState_m_generate_no_fallback(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json_no_fallback); } -/* - * - */ void Init_generator(void) { #ifdef HAVE_RB_EXT_RACTOR_SAFE @@ -1546,6 +1906,10 @@ void Init_generator(void) rb_require("json/common"); mJSON = rb_define_module("JSON"); + + rb_global_variable(&cFragment); + cFragment = rb_const_get(mJSON, rb_intern("Fragment")); + VALUE mExt = rb_define_module_under(mJSON, "Ext"); VALUE mGenerator = rb_define_module_under(mExt, "Generator"); @@ -1573,6 +1937,8 @@ void Init_generator(void) rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); rb_define_method(cState, "array_nl", cState_array_nl, 0); rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "as_json", cState_as_json, 0); + rb_define_method(cState, "as_json=", cState_as_json_set, 1); rb_define_method(cState, "max_nesting", cState_max_nesting, 0); rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); rb_define_method(cState, "script_safe", cState_script_safe, 0); @@ -1594,50 +1960,12 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, -1); + rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1); - rb_define_singleton_method(cState, "generate", cState_m_generate, 3); - - VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); - - VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); - rb_define_method(mObject, "to_json", mObject_to_json, -1); - - VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash"); - rb_define_method(mHash, "to_json", mHash_to_json, -1); - - VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array"); - rb_define_method(mArray, "to_json", mArray_to_json, -1); - -#ifdef RUBY_INTEGER_UNIFICATION - VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); - rb_define_method(mInteger, "to_json", mInteger_to_json, -1); -#else - VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum"); - rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1); - - VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum"); - rb_define_method(mBignum, "to_json", mBignum_to_json, -1); -#endif - VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float"); - rb_define_method(mFloat, "to_json", mFloat_to_json, -1); - - VALUE mString = rb_define_module_under(mGeneratorMethods, "String"); - rb_define_singleton_method(mString, "included", mString_included_s, 1); - rb_define_method(mString, "to_json", mString_to_json, -1); - rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); - rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); - - mString_Extend = rb_define_module_under(mString, "Extend"); - rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); + rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); - VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); - rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); - - VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); - rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); - - VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); - rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); + rb_define_singleton_method(cState, "_generate_no_fallback", cState_m_generate_no_fallback, 3); rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); @@ -1645,10 +1973,6 @@ void Init_generator(void) i_to_s = rb_intern("to_s"); i_to_json = rb_intern("to_json"); i_new = rb_intern("new"); - i_pack = rb_intern("pack"); - i_unpack = rb_intern("unpack"); - i_create_id = rb_intern("create_id"); - i_extend = rb_intern("extend"); i_encode = rb_intern("encode"); sym_indent = ID2SYM(rb_intern("indent")); @@ -1664,10 +1988,14 @@ void Init_generator(void) sym_script_safe = ID2SYM(rb_intern("script_safe")); sym_escape_slash = ID2SYM(rb_intern("escape_slash")); sym_strict = ID2SYM(rb_intern("strict")); + sym_as_json = ID2SYM(rb_intern("as_json")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); binary_encindex = rb_ascii8bit_encindex(); rb_require("json/ext/generator/state"); + + simd_impl = find_simd_implementation(); } diff --git a/ext/json/json.gemspec b/ext/json/json.gemspec index 321a85fcf9..5575731025 100644 --- a/ext/json/json.gemspec +++ b/ext/json/json.gemspec @@ -11,14 +11,13 @@ spec = Gem::Specification.new do |s| s.version = version s.summary = "JSON Implementation for Ruby" - s.homepage = "https://ruby.github.io/json" + s.homepage = "https://github.com/ruby/json" s.metadata = { 'bug_tracker_uri' => 'https://github.com/ruby/json/issues', 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md', - 'documentation_uri' => 'https://ruby.github.io/json/doc/index.html', + 'documentation_uri' => 'https://docs.ruby-lang.org/en/master/JSON.html', 'homepage_uri' => s.homepage, 'source_code_uri' => 'https://github.com/ruby/json', - 'wiki_uri' => 'https://github.com/ruby/json/wiki' } s.required_ruby_version = Gem::Requirement.new(">= 2.7") @@ -45,15 +44,14 @@ spec = Gem::Specification.new do |s| "LEGAL", "README.md", "json.gemspec", - *Dir["lib/**/*.rb"], - ] + ] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__)) if java_ext s.platform = 'java' s.files += Dir["lib/json/ext/**/*.jar"] else s.extensions = Dir["ext/json/**/extconf.rb"] - s.files += Dir["ext/json/**/*.{c,h,rl}"] + s.files += Dir["ext/json/**/*.{c,h,rb}"] end end diff --git a/ext/json/json.h b/ext/json/json.h new file mode 100644 index 0000000000..cf9420d4dd --- /dev/null +++ b/ext/json/json.h @@ -0,0 +1,134 @@ +#ifndef _JSON_H_ +#define _JSON_H_ + +#include "ruby.h" +#include "ruby/encoding.h" +#include <stdint.h> + +#ifndef RBIMPL_ASSERT_OR_ASSUME +# define RBIMPL_ASSERT_OR_ASSUME(x) +#endif + +#if defined(RUBY_DEBUG) && RUBY_DEBUG +# define JSON_ASSERT RUBY_ASSERT +#else +# ifdef JSON_DEBUG +# include <assert.h> +# define JSON_ASSERT(x) assert(x) +# else +# define JSON_ASSERT(x) +# endif +#endif + +/* shims */ + +#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG +# define INT64T2NUM(x) LL2NUM(x) +# define UINT64T2NUM(x) ULL2NUM(x) +#elif SIZEOF_UINT64_T == SIZEOF_LONG +# define INT64T2NUM(x) LONG2NUM(x) +# define UINT64T2NUM(x) ULONG2NUM(x) +#else +# error No uint64_t conversion +#endif + +/* This is the fallback definition from Ruby 3.4 */ +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include <cstdbool> +# endif +#elif defined(HAVE_STDBOOL_H) +# include <stdbool.h> +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef HAVE_RUBY_XFREE_SIZED +static inline void ruby_xfree_sized(void *ptr, size_t oldsize) +{ + ruby_xfree(ptr); +} + +static inline void *ruby_xrealloc2_sized(void *ptr, size_t new_elems, size_t elem_size, size_t old_elems) +{ + return ruby_xrealloc2(ptr, new_elems, elem_size); +} +#endif + +# define JSON_SIZED_REALLOC_N(v, T, m, n) \ + ((v) = (T *)ruby_xrealloc2_sized((void *)(v), (m), sizeof(T), (n))) + +# define JSON_SIZED_FREE(v) ruby_xfree_sized((void *)(v), sizeof(*(v))) +# define JSON_SIZED_FREE_N(v, n) ruby_xfree_sized((void *)(v), sizeof(*(v)) * (n)) + +#ifndef HAVE_RB_EXT_RACTOR_SAFE +# undef RUBY_TYPED_FROZEN_SHAREABLE +# define RUBY_TYPED_FROZEN_SHAREABLE 0 +#endif + +#ifdef RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +#else +# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE +# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +# else +# define RUBY_TYPED_EMBEDDABLE 0 +# endif +#endif + +#ifndef NORETURN +#if defined(__has_attribute) && __has_attribute(noreturn) +#define NORETURN(x) __attribute__((noreturn)) x +#else +#define NORETURN(x) x +#endif +#endif + +#ifndef NOINLINE +#if defined(__has_attribute) && __has_attribute(noinline) +#define NOINLINE(x) __attribute__((noinline)) x +#else +#define NOINLINE(x) x +#endif +#endif + +#ifndef ALWAYS_INLINE +#if defined(__has_attribute) && __has_attribute(always_inline) +#define ALWAYS_INLINE(x) inline __attribute__((always_inline)) x +#else +#define ALWAYS_INLINE(x) inline x +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + +#ifdef RUBY_DEBUG +#ifndef JSON_DEBUG +#define JSON_DEBUG RUBY_DEBUG +#endif +#endif + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX +#define JSON_CPU_LITTLE_ENDIAN_64BITS 1 +#else +#define JSON_CPU_LITTLE_ENDIAN_64BITS 0 +#endif + +#endif // _JSON_H_ diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb index dfd9b7dfc2..26d601926f 100644 --- a/ext/json/lib/json.rb +++ b/ext/json/lib/json.rb @@ -6,6 +6,15 @@ require 'json/common' # # \JSON is a lightweight data-interchange format. # +# \JSON is easy for us humans to read and write, +# and equally simple for machines to read (parse) and write (generate). +# +# \JSON is language-independent, making it an ideal interchange format +# for applications in differing programming languages +# and on differing operating systems. +# +# == \JSON Values +# # A \JSON value is one of the following: # - Double-quoted text: <tt>"foo"</tt>. # - Number: +1+, +1.0+, +2.0e2+. @@ -127,6 +136,24 @@ require 'json/common' # # --- # +# Option +allow_duplicate_key+ specifies whether duplicate keys in objects +# should be ignored or cause an error to be raised: +# +# When not specified: +# # The last value is used and a deprecation warning emitted. +# JSON.parse('{"a": 1, "a":2}') => {"a" => 2} +# # warning: detected duplicate keys in JSON object. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# +# When set to `+true+` +# # The last value is used. +# JSON.parse('{"a": 1, "a":2}') => {"a" => 2} +# +# When set to `+false+`, the future default: +# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError) +# +# --- +# # Option +allow_nan+ (boolean) specifies whether to allow # NaN, Infinity, and MinusInfinity in +source+; # defaults to +false+. @@ -143,8 +170,47 @@ require 'json/common' # ruby = JSON.parse(source, {allow_nan: true}) # ruby # => [NaN, Infinity, -Infinity] # +# --- +# +# Option +allow_trailing_comma+ (boolean) specifies whether to allow +# trailing commas in objects and arrays; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse('[1,]') # unexpected character: ']' at line 1 column 4 (JSON::ParserError) +# +# When enabled: +# JSON.parse('[1,]', allow_trailing_comma: true) # => [1] +# +# --- +# +# Option +allow_control_characters+ (boolean) specifies whether to allow +# unescaped ASCII control characters, such as newlines, in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld" +# +# --- +# +# Option +allow_invalid_escape+ (boolean) specifies whether to ignore backslahes that are followed +# by an invalid escape character in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse('"Hell\o"') # invalid escape character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse('"Hell\o"', allow_invalid_escape: true) # => "Hello" +# # ====== Output Options # +# Option +freeze+ (boolean) specifies whether the returned objects will be frozen; +# defaults to +false+. +# # Option +symbolize_names+ (boolean) specifies whether returned \Hash keys # should be Symbols; # defaults to +false+ (use Strings). @@ -269,8 +335,27 @@ require 'json/common' # JSON.generate(JSON::MinusInfinity) # # Allow: -# ruby = [Float::NaN, Float::Infinity, Float::MinusInfinity] -# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,-Infinity]' +# ruby = [Float::NAN, Float::INFINITY, JSON::NaN, JSON::Infinity, JSON::MinusInfinity] +# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,NaN,Infinity,-Infinity]' +# +# --- +# +# Option +allow_duplicate_key+ (boolean) specifies whether +# hashes with duplicate keys should be allowed or produce an error. +# defaults to emit a deprecation warning. +# +# With the default, (not set): +# Warning[:deprecated] = true +# JSON.generate({ foo: 1, "foo" => 2 }) +# # warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# # => '{"foo":1,"foo":2}' +# +# With <tt>false</tt> +# JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false) +# # detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError) +# +# In version 3.0, <tt>false</tt> will become the default. # # --- # @@ -351,6 +436,9 @@ require 'json/common' # # == \JSON Additions # +# Note that JSON Additions must only be used with trusted data, and is +# deprecated. +# # When you "round trip" a non-\String object from Ruby to \JSON and back, # you have a new \String, instead of the object you began with: # ruby0 = Range.new(0, 2) diff --git a/ext/json/lib/json/add/core.rb b/ext/json/lib/json/add/core.rb index 485f097fff..61ff454212 100644 --- a/ext/json/lib/json/add/core.rb +++ b/ext/json/lib/json/add/core.rb @@ -7,6 +7,7 @@ require 'json/add/date_time' require 'json/add/exception' require 'json/add/range' require 'json/add/regexp' +require 'json/add/string' require 'json/add/struct' require 'json/add/symbol' require 'json/add/time' diff --git a/ext/json/lib/json/add/string.rb b/ext/json/lib/json/add/string.rb new file mode 100644 index 0000000000..9c3bde27fb --- /dev/null +++ b/ext/json/lib/json/add/string.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true +unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED + require 'json' +end + +class String + # call-seq: json_create(o) + # + # Raw Strings are JSON Objects (the raw bytes are stored in an array for the + # key "raw"). The Ruby String can be created by this class method. + def self.json_create(object) + object["raw"].pack("C*") + end + + # call-seq: to_json_raw_object() + # + # This method creates a raw object hash, that can be nested into + # other data structures and will be generated as a raw string. This + # method should be used, if you want to convert raw strings to JSON + # instead of UTF-8 strings, e. g. binary data. + def to_json_raw_object + { + JSON.create_id => self.class.name, + "raw" => unpack("C*"), + } + end + + # call-seq: to_json_raw(*args) + # + # This method creates a JSON text from the result of a call to + # to_json_raw_object of this String. + def to_json_raw(...) + to_json_raw_object.to_json(...) + end +end diff --git a/ext/json/lib/json/add/symbol.rb b/ext/json/lib/json/add/symbol.rb index 1566ebc121..806be4f025 100644 --- a/ext/json/lib/json/add/symbol.rb +++ b/ext/json/lib/json/add/symbol.rb @@ -36,8 +36,13 @@ class Symbol # # # {"json_class":"Symbol","s":"foo"} # - def to_json(*a) - as_json.to_json(*a) + def to_json(state = nil, *a) + state = ::JSON::State.from_state(state) + if state.strict? + super + else + as_json.to_json(state, *a) + end end # See #as_json. diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 3c85ef0655..230bf08012 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -5,10 +5,119 @@ require 'json/version' module JSON autoload :GenericObject, 'json/generic_object' - NOT_SET = Object.new.freeze - private_constant :NOT_SET + module ParserOptions # :nodoc: + class << self + def prepare(opts) + if opts[:object_class] || opts[:array_class] + opts = opts.dup + on_load = opts[:on_load] + + on_load = object_class_proc(opts[:object_class], on_load) if opts[:object_class] + on_load = array_class_proc(opts[:array_class], on_load) if opts[:array_class] + opts[:on_load] = on_load + end + + if opts.fetch(:create_additions, false) != false + opts = create_additions_proc(opts) + end + + opts + end + + private + + def object_class_proc(object_class, on_load) + ->(obj) do + if Hash === obj + object = object_class.new + obj.each { |k, v| object[k] = v } + obj = object + end + on_load.nil? ? obj : on_load.call(obj) + end + end + + def array_class_proc(array_class, on_load) + ->(obj) do + if Array === obj + array = array_class.new + obj.each { |v| array << v } + obj = array + end + on_load.nil? ? obj : on_load.call(obj) + end + end + + # TODO: extract :create_additions support to another gem for version 3.0 + def create_additions_proc(opts) + if opts[:symbolize_names] + raise ArgumentError, "options :symbolize_names and :create_additions cannot be used in conjunction" + end + + opts = opts.dup + create_additions = opts.fetch(:create_additions, false) + on_load = opts[:on_load] + object_class = opts[:object_class] || Hash + + opts[:on_load] = ->(object) do + case object + when String + opts[:match_string]&.each do |pattern, klass| + if match = pattern.match(object) + create_additions_warning if create_additions.nil? + object = klass.json_create(object) + break + end + end + when object_class + if opts[:create_additions] != false + if class_path = object[JSON.create_id] + klass = begin + Object.const_get(class_path) + rescue NameError => e + raise ArgumentError, "can't get const #{class_path}: #{e}" + end + + if klass.respond_to?(:json_creatable?) ? klass.json_creatable? : klass.respond_to?(:json_create) + create_additions_warning if create_additions.nil? + object = klass.json_create(object) + end + end + end + end + + on_load.nil? ? object : on_load.call(object) + end + + opts + end + + def create_additions_warning + JSON.deprecation_warning "JSON.load implicit support for `create_additions: true` is deprecated " \ + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " \ + "pass `create_additions: true`" + end + end + end class << self + def deprecation_warning(message, uplevel = 3) # :nodoc: + gem_root = File.expand_path("..", __dir__) + "/" + caller_locations(uplevel, 10).each do |frame| + if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c") + uplevel += 1 + else + break + end + end + + if RUBY_VERSION >= "3.0" + warn(message, uplevel: uplevel, category: :deprecated) + else + warn(message, uplevel: uplevel) + end + end + # :call-seq: # JSON[object] -> new_array or new_string # @@ -20,7 +129,7 @@ module JSON # Otherwise, calls JSON.generate with +object+ and +opts+ (see method #generate): # ruby = [0, 1, nil] # JSON[ruby] # => '[0,1,null]' - def [](object, opts = {}) + def [](object, opts = nil) if object.is_a?(String) return JSON.parse(object, opts) elsif object.respond_to?(:to_str) @@ -43,64 +152,76 @@ module JSON const_set :Parser, parser end - # Return the constant located at _path_. The format of _path_ has to be - # either ::A::B::C or A::B::C. In any case, A has to be located at the top - # level (absolute namespace path?). If there doesn't exist a constant at - # the given path, an ArgumentError is raised. - def deep_const_get(path) # :nodoc: - Object.const_get(path) - rescue NameError => e - raise ArgumentError, "can't get const #{path}: #{e}" - end - # Set the module _generator_ to be used by JSON. def generator=(generator) # :nodoc: old, $VERBOSE = $VERBOSE, nil @generator = generator - generator_methods = generator::GeneratorMethods - for const in generator_methods.constants - klass = const_get(const) - modul = generator_methods.const_get(const) - klass.class_eval do - instance_methods(false).each do |m| - m.to_s == 'to_json' and remove_method m + if generator.const_defined?(:GeneratorMethods) + generator_methods = generator::GeneratorMethods + for const in generator_methods.constants + klass = const_get(const) + modul = generator_methods.const_get(const) + klass.class_eval do + instance_methods(false).each do |m| + m.to_s == 'to_json' and remove_method m + end + include modul end - include modul end end self.state = generator::State - const_set :State, self.state - const_set :SAFE_STATE_PROTOTYPE, State.new # for JRuby - const_set :FAST_STATE_PROTOTYPE, create_fast_state - const_set :PRETTY_STATE_PROTOTYPE, create_pretty_state + const_set :State, state ensure $VERBOSE = old end - def create_fast_state - State.new( - :indent => '', - :space => '', - :object_nl => "", - :array_nl => "", - :max_nesting => false - ) - end - - def create_pretty_state - State.new( - :indent => ' ', - :space => ' ', - :object_nl => "\n", - :array_nl => "\n" - ) - end - # Returns the JSON generator module that is used by JSON. attr_reader :generator # Sets or Returns the JSON generator state class that is used by JSON. attr_accessor :state + + private + + # Called from the extension when a hash has both string and symbol keys + def on_mixed_keys_hash(hash, do_raise) + set = {} + hash.each_key do |key| + key_str = key.to_s + + if set[key_str] + message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}" + if do_raise + raise GeneratorError, message + else + deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`") + end + else + set[key_str] = true + end + end + end + + def deprecated_singleton_attr_accessor(*attrs) + args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : "" + attrs.each do |attr| + singleton_class.class_eval <<~RUBY + def #{attr} + warn "JSON.#{attr} is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args} + @#{attr} + end + + def #{attr}=(val) + warn "JSON.#{attr}= is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args} + @#{attr} = val + end + + def _#{attr} + @#{attr} + end + RUBY + end + end end # Sets create identifier, which is used to decide if the _json_create_ @@ -116,32 +237,24 @@ module JSON Thread.current[:"JSON.create_id"] || 'json_class' end - NaN = 0.0/0 + NaN = Float::NAN - Infinity = 1.0/0 + Infinity = Float::INFINITY MinusInfinity = -Infinity # The base exception for JSON errors. - class JSONError < StandardError - def self.wrap(exception) - obj = new("Wrapped(#{exception.class}): #{exception.message.inspect}") - obj.set_backtrace exception.backtrace - obj - end - end + class JSONError < StandardError; end # This exception is raised if a parser error occurs. - class ParserError < JSONError; end + class ParserError < JSONError + attr_reader :line, :column + end # This exception is raised if the nesting of parsed data structures is too # deep. class NestingError < ParserError; end - # :stopdoc: - class CircularDatastructure < NestingError; end - # :startdoc: - # This exception is raised if a generator or unparser error occurs. class GeneratorError < JSONError attr_reader :invalid_object @@ -152,20 +265,40 @@ module JSON end def detailed_message(...) + # Exception#detailed_message doesn't exist until Ruby 3.2 + super_message = defined?(super) ? super : message + if @invalid_object.nil? - super + super_message else - "#{super}\nInvalid object: #{@invalid_object.inspect}" + "#{super_message}\nInvalid object: #{@invalid_object.inspect}" end end end - # For backwards compatibility - UnparserError = GeneratorError # :nodoc: + # Fragment of JSON document that is to be included as is: + # fragment = JSON::Fragment.new("[1, 2, 3]") + # JSON.generate({ count: 3, items: fragments }) + # + # This allows to easily assemble multiple JSON fragments that have + # been persisted somewhere without having to parse them nor resorting + # to string interpolation. + # + # Note: no validation is performed on the provided string. It is the + # responsibility of the caller to ensure the string contains valid JSON. + Fragment = Struct.new(:json) do + def initialize(json) + unless string = String.try_convert(json) + raise TypeError, " no implicit conversion of #{json.class} into String" + end - # This exception is raised if the required unicode support is missing on the - # system. Usually this means that the iconv library is not installed. - class MissingUnicodeSupport < JSONError; end + super(string) + end + + def to_json(state = nil, *) + json + end + end module_function @@ -218,9 +351,16 @@ module JSON # JSON.parse('') # def parse(source, opts = nil) + opts = ParserOptions.prepare(opts) unless opts.nil? Parser.parse(source, opts) end + PARSE_L_OPTIONS = { + max_nesting: false, + allow_nan: true, + }.freeze + private_constant :PARSE_L_OPTIONS + # :call-seq: # JSON.parse!(source, opts) -> object # @@ -233,12 +373,11 @@ module JSON # which disables checking for nesting depth. # - Option +allow_nan+, if not provided, defaults to +true+. def parse!(source, opts = nil) - options = { - :max_nesting => false, - :allow_nan => true - } - options.merge!(opts) if opts - Parser.new(source, options).parse + if opts.nil? + parse(source, PARSE_L_OPTIONS) + else + parse(source, PARSE_L_OPTIONS.merge(opts)) + end end # :call-seq: @@ -268,7 +407,7 @@ module JSON # # Returns a \String containing the generated \JSON data. # - # See also JSON.fast_generate, JSON.pretty_generate. + # See also JSON.pretty_generate. # # Argument +obj+ is the Ruby object to be converted to \JSON. # @@ -307,13 +446,6 @@ module JSON end end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and - # later delete them. - alias unparse generate - module_function :unparse - # :startdoc: - # :call-seq: # JSON.fast_generate(obj, opts) -> new_string # @@ -328,19 +460,21 @@ module JSON # # Raises SystemStackError (stack level too deep): # JSON.fast_generate(a) def fast_generate(obj, opts = nil) - if State === opts - state = opts + if RUBY_VERSION >= "3.0" + warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated else - state = JSON.create_fast_state.configure(opts) + warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 end - state.generate(obj) + generate(obj, opts) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias fast_unparse fast_generate - module_function :fast_unparse - # :startdoc: + PRETTY_GENERATE_OPTIONS = { + indent: ' ', + space: ' ', + object_nl: "\n", + array_nl: "\n", + }.freeze + private_constant :PRETTY_GENERATE_OPTIONS # :call-seq: # JSON.pretty_generate(obj, opts = nil) -> new_string @@ -373,57 +507,52 @@ module JSON # } # def pretty_generate(obj, opts = nil) - if State === opts - state, opts = opts, nil - else - state = JSON.create_pretty_state - end + return opts.generate(obj) if State === opts + + options = PRETTY_GENERATE_OPTIONS + if opts - if opts.respond_to? :to_hash - opts = opts.to_hash - elsif opts.respond_to? :to_h - opts = opts.to_h - else - raise TypeError, "can't convert #{opts.class} into Hash" + unless opts.is_a?(Hash) + if opts.respond_to? :to_hash + opts = opts.to_hash + elsif opts.respond_to? :to_h + opts = opts.to_h + else + raise TypeError, "can't convert #{opts.class} into Hash" + end end - state.configure(opts) + options = options.merge(opts) end - state.generate(obj) + + State.generate(obj, options, nil) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias pretty_unparse pretty_generate - module_function :pretty_unparse - # :startdoc: + # Sets or returns default options for the JSON.unsafe_load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + deprecated_singleton_attr_accessor :unsafe_load_default_options - class << self - # Sets or returns default options for the JSON.unsafe_load method. - # Initially: - # opts = JSON.load_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} - attr_accessor :unsafe_load_default_options - end - self.unsafe_load_default_options = { + @unsafe_load_default_options = { :max_nesting => false, :allow_nan => true, :allow_blank => true, :create_additions => true, } - class << self - # Sets or returns default options for the JSON.load method. - # Initially: - # opts = JSON.load_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} - attr_accessor :load_default_options - end - self.load_default_options = { + # Sets or returns default options for the JSON.load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + deprecated_singleton_attr_accessor :load_default_options + + @load_default_options = { :allow_nan => true, :allow_blank => true, :create_additions => nil, } # :call-seq: + # JSON.unsafe_load(source, options = {}) -> object # JSON.unsafe_load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -531,6 +660,7 @@ module JSON # when Array # obj.map! {|v| deserialize_obj v } # end + # obj # }) # pp ruby # Output: @@ -554,9 +684,14 @@ module JSON # def unsafe_load(source, proc = nil, options = nil) opts = if options.nil? - unsafe_load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _unsafe_load_default_options + end else - unsafe_load_default_options.merge(options) + _unsafe_load_default_options.merge(options) end unless source.is_a?(String) @@ -572,12 +707,17 @@ module JSON if opts[:allow_blank] && (source.nil? || source.empty?) source = 'null' end - result = parse(source, opts) - recurse_proc(result, &proc) if proc - result + + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc + end + + parse(source, opts) end # :call-seq: + # JSON.load(source, options = {}) -> object # JSON.load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -691,6 +831,7 @@ module JSON # when Array # obj.map! {|v| deserialize_obj v } # end + # obj # }) # pp ruby # Output: @@ -713,10 +854,20 @@ module JSON # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>} # def load(source, proc = nil, options = nil) + if proc && options.nil? && proc.is_a?(Hash) + options = proc + proc = nil + end + opts = if options.nil? - load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _load_default_options + end else - load_default_options.merge(options) + _load_default_options.merge(options) end unless source.is_a?(String) @@ -729,39 +880,24 @@ module JSON end end - if opts[:allow_blank] && (source.nil? || source.empty?) + if opts[:allow_blank] && (source.nil? || (String === source && source.empty?)) source = 'null' end - result = parse(source, opts) - recurse_proc(result, &proc) if proc - result - end - # Recursively calls passed _Proc_ if the parsed data structure is an _Array_ or _Hash_ - def recurse_proc(result, &proc) # :nodoc: - case result - when Array - result.each { |x| recurse_proc x, &proc } - proc.call result - when Hash - result.each { |x, y| recurse_proc x, &proc; recurse_proc y, &proc } - proc.call result - else - proc.call result + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc end - end - alias restore load - module_function :restore - - class << self - # Sets or returns the default options for the JSON.dump method. - # Initially: - # opts = JSON.dump_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true} - attr_accessor :dump_default_options + parse(source, opts) end - self.dump_default_options = { + + # Sets or returns the default options for the JSON.dump method. + # Initially: + # opts = JSON.dump_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true} + deprecated_singleton_attr_accessor :dump_default_options + @dump_default_options = { :max_nesting => false, :allow_nan => true, } @@ -814,9 +950,9 @@ module JSON end end - opts = JSON.dump_default_options + opts = JSON._dump_default_options opts = opts.merge(:max_nesting => limit) if limit - opts = merge_dump_options(opts, **kwargs) if kwargs + opts = opts.merge(kwargs) if kwargs begin State.generate(obj, opts, anIO) @@ -825,18 +961,166 @@ module JSON end end - # Encodes string using String.encode. - def self.iconv(to, from, string) - string.encode(to, from) + # :stopdoc: + # All these were meant to be deprecated circa 2009, but were just set as undocumented + # so usage still exist in the wild. + def unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated + else + warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 + end + generate(...) + end + module_function :unparse + + def fast_unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated + else + warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 + end + generate(...) + end + module_function :fast_unparse + + def pretty_unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + pretty_generate(...) end + module_function :fast_unparse - def merge_dump_options(opts, strict: NOT_SET) - opts = opts.merge(strict: strict) if NOT_SET != strict - opts + def restore(...) + if RUBY_VERSION >= "3.0" + warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1, category: :deprecated + else + warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1 + end + load(...) end + module_function :restore class << self - private :merge_dump_options + private + + def const_missing(const_name) + case const_name + when :PRETTY_STATE_PROTOTYPE + if RUBY_VERSION >= "3.0" + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + state.new(PRETTY_GENERATE_OPTIONS) + else + super + end + end + end + # :startdoc: + + # JSON::Coder holds a parser and generator configuration. + # + # module MyApp + # JSONC_CODER = JSON::Coder.new( + # allow_trailing_comma: true + # ) + # end + # + # MyApp::JSONC_CODER.load(document) + # + class Coder + # :call-seq: + # JSON.new(options = nil, &block) + # + # Argument +options+, if given, contains a \Hash of options for both parsing and generating. + # See {Parsing Options}[rdoc-ref:JSON@Parsing+Options], + # and {Generating Options}[rdoc-ref:JSON@Generating+Options]. + # + # For generation, the <tt>strict: true</tt> option is always set. When a Ruby object with no native \JSON counterpart is + # encountered, the block provided to the initialize method is invoked, and must return a Ruby object that has a native + # \JSON counterpart: + # + # module MyApp + # API_JSON_CODER = JSON::Coder.new do |object| + # case object + # when Time + # object.iso8601(3) + # else + # object # Unknown type, will raise + # end + # end + # end + # + # puts MyApp::API_JSON_CODER.dump(Time.now.utc) # => "2025-01-21T08:41:44.286Z" + # + def initialize(options = nil, &as_json) + if options.nil? + options = { strict: true } + else + options = options.dup + options[:strict] = true + end + options[:as_json] = as_json if as_json + + @state = State.new(options).freeze + @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)).freeze + end + + # call-seq: + # dump(object) -> String + # dump(object, io) -> io + # + # Serialize the given object into a \JSON document. + def dump(object, io = nil) + @state.generate(object, io) + end + alias_method :generate, :dump + + # call-seq: + # load(string) -> Object + # + # Parse the given \JSON document and return an equivalent Ruby object. + def load(source) + @parser_config.parse(source) + end + alias_method :parse, :load + + # call-seq: + # load(path) -> Object + # + # Parse the given \JSON document and return an equivalent Ruby object. + def load_file(path) + load(File.read(path, encoding: Encoding::UTF_8)) + end + end + + module GeneratorMethods + # call-seq: to_json(*) + # + # Converts this object into a JSON string. + # If this object doesn't directly maps to a JSON native type, + # first convert it to a string (calling #to_s), then converts + # it to a JSON string, and returns the result. + # This is a fallback, if no special method #to_json was defined for some object. + def to_json(state = nil, *) + obj = case self + when nil, false, true, Integer, Float, Array, Hash + self + else + "#{self}" + end + + if state.nil? + JSON::State._generate_no_fallback(obj, nil, nil) + else + JSON::State.from_state(state)._generate_no_fallback(obj) + end + end end end @@ -846,8 +1130,14 @@ module ::Kernel # Outputs _objs_ to STDOUT as JSON strings in the shortest form, that is in # one line. def j(*objs) + if RUBY_VERSION >= "3.0" + warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated + else + warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1 + end + objs.each do |obj| - puts JSON::generate(obj, :allow_nan => true, :max_nesting => false) + puts JSON.generate(obj, :allow_nan => true, :max_nesting => false) end nil end @@ -855,8 +1145,14 @@ module ::Kernel # Outputs _objs_ to STDOUT as JSON strings in a pretty format, with # indentation and over many lines. def jj(*objs) + if RUBY_VERSION >= "3.0" + warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated + else + warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1 + end + objs.each do |obj| - puts JSON::pretty_generate(obj, :allow_nan => true, :max_nesting => false) + puts JSON.pretty_generate(obj, :allow_nan => true, :max_nesting => false) end nil end @@ -867,27 +1163,11 @@ module ::Kernel # # The _opts_ argument is passed through to generate/parse respectively. See # generate and parse for their documentation. - def JSON(object, *args) - if object.is_a?(String) - return JSON.parse(object, args.first) - elsif object.respond_to?(:to_str) - str = object.to_str - if str.is_a?(String) - return JSON.parse(object.to_str, args.first) - end - end - - JSON.generate(object, args.first) + def JSON(object, opts = nil) + JSON[object, opts] end end -# Extends any Class to include _json_creatable?_ method. -class ::Class - # Returns true if this class can be used to create an instance - # from a serialised JSON string. The class has to implement a class - # method _json_create_ that expects a hash as first parameter. The hash - # should include the required data. - def json_creatable? - respond_to?(:json_create) - end +class Object + include JSON::GeneratorMethods end diff --git a/ext/json/lib/json/ext.rb b/ext/json/lib/json/ext.rb index 1db5ea122c..5bacc5e371 100644 --- a/ext/json/lib/json/ext.rb +++ b/ext/json/lib/json/ext.rb @@ -34,12 +34,12 @@ module JSON if RUBY_ENGINE == 'truffleruby' require 'json/truffle_ruby/generator' - JSON.generator = ::JSON::TruffleRuby::Generator + JSON.generator = JSON::TruffleRuby::Generator else require 'json/ext/generator' JSON.generator = Generator end end - JSON_LOADED = true unless defined?(::JSON::JSON_LOADED) + JSON_LOADED = true unless defined?(JSON::JSON_LOADED) end diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb index 6cd9496e67..e4f425af6a 100644 --- a/ext/json/lib/json/ext/generator/state.rb +++ b/ext/json/lib/json/ext/generator/state.rb @@ -8,20 +8,8 @@ module JSON # # Instantiates a new State object, configured by _opts_. # - # _opts_ can have the following keys: - # - # * *indent*: a string used to indent levels (default: ''), - # * *space*: a string that is put after, a : or , delimiter (default: ''), - # * *space_before*: a string that is put before a : pair delimiter (default: ''), - # * *object_nl*: a string that is put at the end of a JSON object (default: ''), - # * *array_nl*: a string that is put at the end of a JSON array (default: ''), - # * *allow_nan*: true if NaN, Infinity, and -Infinity should be - # generated, otherwise an exception is thrown, if these values are - # encountered. This options defaults to false. - # * *ascii_only*: true if only ASCII characters should be generated. This - # option defaults to false. - # * *buffer_initial_length*: sets the initial length of the generator's - # internal buffer. + # Argument +opts+, if given, contains a \Hash of options for the generation. + # See {Generating Options}[rdoc-ref:JSON@Generating+Options]. def initialize(opts = nil) if opts && !opts.empty? configure(opts) @@ -58,6 +46,7 @@ module JSON space_before: space_before, object_nl: object_nl, array_nl: array_nl, + as_json: as_json, allow_nan: allow_nan?, ascii_only: ascii_only?, max_nesting: max_nesting, @@ -67,6 +56,11 @@ module JSON buffer_initial_length: buffer_initial_length, } + allow_duplicate_key = allow_duplicate_key? + unless allow_duplicate_key.nil? + result[:allow_duplicate_key] = allow_duplicate_key + end + instance_variables.each do |iv| iv = iv.to_s[1..-1] result[iv.to_sym] = self[iv] @@ -81,6 +75,8 @@ module JSON # # Returns the value returned by method +name+. def [](name) + ::JSON.deprecation_warning("JSON::State#[] is deprecated and will be removed in json 3.0.0") + if respond_to?(name) __send__(name) else @@ -93,6 +89,8 @@ module JSON # # Sets the attribute name to value. def []=(name, value) + ::JSON.deprecation_warning("JSON::State#[]= is deprecated and will be removed in json 3.0.0") + if respond_to?(name_writer = "#{name}=") __send__ name_writer, value else diff --git a/ext/json/lib/json/generic_object.rb b/ext/json/lib/json/generic_object.rb index ec5aa9dcb2..5c8ace354b 100644 --- a/ext/json/lib/json/generic_object.rb +++ b/ext/json/lib/json/generic_object.rb @@ -52,14 +52,6 @@ module JSON table end - def [](name) - __send__(name) - end unless method_defined?(:[]) - - def []=(name, value) - __send__("#{name}=", value) - end unless method_defined?(:[]=) - def |(other) self.class[other.to_hash.merge(to_hash)] end diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index 4fc5ff83d5..30c0a71d2f 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.9.1' + VERSION = '2.19.8' end diff --git a/ext/json/parser/depend b/ext/json/parser/depend index 4cdf69a749..d4737b1dfb 100644 --- a/ext/json/parser/depend +++ b/ext/json/parser/depend @@ -141,6 +141,7 @@ parser.o: $(hdrdir)/ruby/internal/intern/re.h parser.o: $(hdrdir)/ruby/internal/intern/ruby.h parser.o: $(hdrdir)/ruby/internal/intern/select.h parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +parser.o: $(hdrdir)/ruby/internal/intern/set.h parser.o: $(hdrdir)/ruby/internal/intern/signal.h parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h parser.o: $(hdrdir)/ruby/internal/intern/string.h @@ -174,6 +175,8 @@ parser.o: $(hdrdir)/ruby/ruby.h parser.o: $(hdrdir)/ruby/st.h parser.o: $(hdrdir)/ruby/subst.h parser.o: $(srcdir)/../fbuffer/fbuffer.h +parser.o: $(srcdir)/../json.h +parser.o: $(srcdir)/../simd/simd.h +parser.o: $(srcdir)/../vendor/ryu.h parser.o: parser.c -parser.o: parser.rl # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index a8e21aed4b..a9d740c755 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,12 +1,21 @@ # frozen_string_literal: true require 'mkmf' -have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 +$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" +have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 +have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby -have_func("rb_category_warn", "ruby.h") # Missing on TruffleRuby -have_func("strnlen", "string.h") # Missing on Solaris 10 +have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + +if RUBY_ENGINE == "ruby" + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 +end append_cflags("-std=c99") +if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" +end + create_makefile 'json/ext/parser' diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 2906cfd1e8..fbfbfac9da 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,36 +1,23 @@ -/* This file is automatically generated from parser.rl by using ragel */ -#line 1 "parser.rl" -#include "ruby.h" -#include "../fbuffer/fbuffer.h" +#include "../json.h" +#include "../vendor/ryu.h" +#include "../simd/simd.h" static VALUE mJSON, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; +static ID i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, + sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load, + sym_allow_duplicate_key; static int binary_encindex; static int utf8_encindex; -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - #ifndef HAVE_RB_HASH_BULK_INSERT // For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +static void +rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) { long index = 0; while (index < count) { @@ -42,6 +29,17 @@ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) } #endif +#ifndef HAVE_RB_HASH_NEW_CAPA +#define rb_hash_new_capa(n) rb_hash_new() +#endif + +#ifndef HAVE_RB_STR_TO_INTERNED_STR +static VALUE rb_str_to_interned_str(VALUE str) +{ + return rb_funcall(rb_str_freeze(str), i_uminus, 0); +} +#endif + /* name cache */ #include <string.h> @@ -86,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring cache->entries[index] = rstring; } -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +#define rstring_cache_memcmp memcmp + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +#if __has_builtin(__builtin_bswap64) +#undef rstring_cache_memcmp +ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length) { - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); + // The libc memcmp has numerous complex optimizations, but in this particular case, + // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to + // inline a simpler memcmp outperforms calling the libc version. + long i = 0; + + for (; i + 8 <= length; i += 8) { + uint64_t a, b; + memcpy(&a, str + i, 8); + memcpy(&b, rptr + i, 8); + if (a != b) { + a = __builtin_bswap64(a); + b = __builtin_bswap64(b); + return (a < b) ? -1 : 1; + } } + + for (; i < length; i++) { + if (str[i] != rptr[i]) { + return (str[i] < rptr[i]) ? -1 : 1; + } + } + + return 0; } +#endif +#endif -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring) { - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } + const char *rstring_ptr; + long rstring_length; + + RSTRING_GETMEM(rstring, rstring_ptr, rstring_length); - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; + if (length == rstring_length) { + return rstring_cache_memcmp(str, rstring_ptr, length); + } else { + return (int)(length - rstring_length); } +} +ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ int low = 0; int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; while (low <= high) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); + int cmp = rstring_cache_cmp(str, length, entry); - if (last_cmp == 0) { + if (cmp == 0) { return entry; - } else if (last_cmp > 0) { + } else if (cmp > 0) { low = mid + 1; } else { high = mid - 1; } } - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - VALUE rstring = build_interned_string(str, length); if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); + rvalue_cache_insert_at(cache, low, rstring); } return rstring; } static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) { - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - int low = 0; int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; while (low <= high) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - if (last_cmp == 0) { + if (cmp == 0) { return entry; - } else if (last_cmp > 0) { + } else if (cmp > 0) { low = mid + 1; } else { high = mid - 1; } } - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - VALUE rsymbol = build_symbol(str, length); if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); + rvalue_cache_insert_at(cache, low, rsymbol); } return rsymbol; } @@ -225,19 +211,20 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { stack = rvalue_stack_spill(stack, handle, stack_ref); } else { - REALLOC_N(stack->ptr, VALUE, required); + JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa); stack->capa = required; } return stack; } -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) { if (RB_UNLIKELY(stack->head >= stack->capa)) { stack = rvalue_stack_grow(stack, handle, stack_ref); } stack->ptr[stack->head] = value; stack->head++; + return value; } static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) @@ -254,35 +241,60 @@ static void rvalue_stack_mark(void *ptr) { rvalue_stack *stack = (rvalue_stack *)ptr; long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); + if (stack && stack->ptr) { + for (index = 0; index < stack->head; index++) { + rb_gc_mark_movable(stack->ptr[index]); + } } } +static void rvalue_stack_free_buffer(rvalue_stack *stack) +{ + JSON_SIZED_FREE_N(stack->ptr, stack->capa); + stack->ptr = NULL; +} + static void rvalue_stack_free(void *ptr) { rvalue_stack *stack = (rvalue_stack *)ptr; if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); + rvalue_stack_free_buffer(stack); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + JSON_SIZED_FREE(stack); +#endif } } static size_t rvalue_stack_memsize(const void *ptr) { const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; + size_t memsize = sizeof(VALUE) * stack->capa; +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + memsize += sizeof(rvalue_stack); +#endif + return memsize; +} + +static void rvalue_stack_compact(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + long index; + if (stack && stack->ptr) { + for (index = 0; index < stack->head; index++) { + stack->ptr[index] = rb_gc_location(stack->ptr[index]); + } + } } static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { + .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack", + .function = { .dmark = rvalue_stack_mark, .dfree = rvalue_stack_free, .dsize = rvalue_stack_memsize, + .dcompact = rvalue_stack_compact, }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, }; static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) @@ -301,50 +313,208 @@ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, static void rvalue_stack_eagerly_release(VALUE handle) { - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); + if (handle) { + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + rvalue_stack_free_buffer(stack); +#else + rvalue_stack_free(stack); + RTYPEDDATA_DATA(handle) = NULL; +#endif + } } -/* unicode */ +/* frame stack */ + +// Iterative (non-recursive) parsing keeps an explicit stack of the containers +// currently being built, instead of relying on the C call stack. Each frame +// only needs enough bookkeeping to close its container: which kind it is, the +// rvalue_stack position where its children start (so we know how many to pop), +// and the cursor at its opening brace (used to rewind for duplicate key +// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses +// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that +// it's freed even if parsing raises. +// +// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release +// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts +// -- the element type and the absence of a mark function are the only real +// differences. Keep the two in sync: a fix to the spill/release or +// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the +// other. +#define JSON_FRAME_STACK_INITIAL_CAPA 32 + +enum json_frame_type { + JSON_FRAME_ROOT, // == JSON_PHASE_DONE + JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA + JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA +}; -static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 +// Where a frame is within its container's grammar. This is the entirety of the +// parser's "what to do next" state: json_parse_any dispatches on the top +// frame's phase and holds no resume state in C locals, so a parse can stop at +// any value boundary and be resumed purely from the (persistable) frame stack. +// +// The first three phases are deliberately equal to the corresponding json_frame_type +// to simplify the transition of phase in json_value_completed. +enum json_frame_phase { + JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed + JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']' + JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}' + JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':') + JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',') + JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':' }; -static uint32_t unescape_unicode(const unsigned char *p) -{ - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - return result; +typedef struct json_frame_struct { + enum json_frame_type type; + enum json_frame_phase phase; + long value_stack_head; // rvalue_stack->head when this container opened + const char *start_cursor; // object frames only (the '{'); NULL otherwise +} json_frame; + +typedef struct json_frame_stack_struct { + enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated + long capa; + long head; + json_frame *ptr; +} json_frame_stack; + +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + +typedef struct JSON_ParserStruct { + VALUE on_load_proc; + VALUE decimal_class; + ID decimal_method_id; + enum duplicate_key_action on_duplicate_key; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool allow_control_characters; + bool allow_invalid_escape; + bool symbolize_names; + bool freeze; +} JSON_ParserConfig; + +typedef struct JSON_ParserStateStruct { + VALUE *value_stack_handle; + VALUE *frame_stack_handle; + const char *start; + const char *cursor; + const char *end; + rvalue_stack *value_stack; + json_frame_stack *frames; + rvalue_cache name_cache; + int in_array; + int current_nesting; + unsigned int emitted_deprecations; +} JSON_ParserState; + +static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref); + +static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = json_frame_stack_spill(stack, handle, stack_ref); + } else { + JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa); + stack->capa = required; + } + return stack; +} + +static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame) +{ + json_frame_stack *stack = state->frames; + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames); + } + + json_frame *frame_ptr = &stack->ptr[stack->head++]; + *frame_ptr = frame; + return frame_ptr; +} + +static inline json_frame *json_frame_stack_peek(json_frame_stack *stack) +{ + return &stack->ptr[stack->head - 1]; +} + +static inline void json_frame_stack_pop(json_frame_stack *stack) +{ + stack->head--; +} + +static void json_frame_stack_free_buffer(json_frame_stack *stack) +{ + JSON_SIZED_FREE_N(stack->ptr, stack->capa); + stack->ptr = NULL; +} + +static void json_frame_stack_free(void *ptr) +{ + json_frame_stack *stack = (json_frame_stack *)ptr; + if (stack) { + json_frame_stack_free_buffer(stack); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + JSON_SIZED_FREE(stack); +#endif + } +} + +static size_t json_frame_stack_memsize(const void *ptr) +{ + const json_frame_stack *stack = (const json_frame_stack *)ptr; + + size_t memsize = sizeof(json_frame) * stack->capa; +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + memsize += sizeof(json_frame_stack); +#endif + return memsize; +} + +static const rb_data_type_t JSON_Parser_frame_stack_type = { + .wrap_struct_name = "JSON::Ext::Parser/frame_stack", + .function = { + .dmark = NULL, + .dfree = json_frame_stack_free, + .dsize = json_frame_stack_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, +}; + +static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref) +{ + json_frame_stack *stack; + *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, json_frame_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(json_frame, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head); + return stack; +} + +static void json_frame_stack_eagerly_release(VALUE handle) +{ + if (handle) { + json_frame_stack *stack; + TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + json_frame_stack_free_buffer(stack); +#else + json_frame_stack_free(stack); + RTYPEDDATA_DATA(handle) = NULL; +#endif + } } static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) @@ -373,1868 +543,245 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } -typedef struct JSON_ParserStruct { - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; -} JSON_Parser; - -typedef struct JSON_ParserStateStruct { - JSON_Parser *json; - VALUE Vsource; - VALUE stack_handle; - char *source; - long len; - char *memo; - FBuffer fbuffer; - rvalue_stack *stack; - rvalue_cache name_cache; - int in_array; -} JSON_ParserState; - -#define GET_PARSER \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 +static inline size_t rest(JSON_ParserState *state) { + return state->end - state->cursor; +} -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static inline bool eos(JSON_ParserState *state) { + return state->cursor >= state->end; +} -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) +static inline char peek(JSON_ParserState *state) { - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); + if (RB_UNLIKELY(eos(state))) { + return 0; + } + return *state->cursor; } -#endif -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) +static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out) { - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + JSON_ASSERT(state->cursor <= state->end); - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; + // Redundant but helpful for hardening + if (RB_UNLIKELY(state->cursor > state->end)) { + state->cursor = state->end; + } - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - - - -#line 472 "parser.rl" - - - -#line 454 "parser.c" -enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 32}; -enum {JSON_object_error = 0}; - -enum {JSON_object_en_main = 1}; - - -#line 512 "parser.rl" - - -#define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack) - -static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - long stack_head = state->stack->head; - - -#line 478 "parser.c" - { - cs = JSON_object_start; - } - -#line 527 "parser.rl" - -#line 485 "parser.c" - { - short _widec; - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 123 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 47: goto st28; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st2; - goto st0; -tr2: -#line 491 "parser.rl" - { - char *np; - json->parsing_name = true; - np = JSON_parse_string(state, json, p, pe, result); - json->parsing_name = false; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - PUSH(*result); - {p = (( np))-1;} - } - } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 530 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 58: goto st8; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr11; - case 45: goto tr11; - case 47: goto st24; - case 73: goto tr11; - case 78: goto tr11; - case 91: goto tr11; - case 102: goto tr11; - case 110: goto tr11; - case 116: goto tr11; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr11; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -tr11: -#line 480 "parser.rl" - { - char *np = JSON_parse_value(state, json, p, pe, result, current_nesting); - if (np == NULL) { - p--; {p++; cs = 9; goto _out;} - } else { - {p = (( np))-1;} + const char *cursor = state->cursor; + long column = 0; + long line = 1; + + while (cursor >= state->start) { + if (*cursor-- == '\n') { + break; } + column++; } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 611 "parser.c" - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) < 44 ) { - if ( 32 <= (*p) && (*p) <= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 44 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 125: goto tr4; - case 269: goto st10; - case 288: goto st10; - case 300: goto st11; - case 303: goto st16; - case 525: goto st9; - case 544: goto st9; - case 556: goto st2; - case 559: goto st20; - } - if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st9; - } else if ( _widec >= 265 ) - goto st10; - goto st0; -tr4: -#line 502 "parser.rl" - { p--; {p++; cs = 32; goto _out;} } - goto st32; -st32: - if ( ++p == pe ) - goto _test_eof32; -case 32: -#line 679 "parser.c" - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 44: goto st11; - case 47: goto st16; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 13: goto st11; - case 32: goto st11; - case 34: goto tr2; - case 47: goto st12; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st11; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st15; - } - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 42 ) - goto st14; - goto st13; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st11; - } - goto st13; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 10 ) - goto st11; - goto st15; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st19; - } - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 42 ) - goto st18; - goto st17; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - switch( (*p) ) { - case 42: goto st18; - case 47: goto st10; - } - goto st17; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 10 ) - goto st10; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st17; - case 303: goto st19; - case 554: goto st21; - case 559: goto st23; - } - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 554: goto st22; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 303: goto st10; - case 554: goto st22; - case 559: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 489 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st10; - case 522: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st23; - } else if ( _widec >= 128 ) - goto st19; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st27; - } - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 42 ) - goto st26; - goto st25; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - switch( (*p) ) { - case 42: goto st26; - case 47: goto st8; - } - goto st25; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 10 ) - goto st8; - goto st27; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - switch( (*p) ) { - case 42: goto st29; - case 47: goto st31; - } - goto st0; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: - if ( (*p) == 42 ) - goto st30; - goto st29; -st30: - if ( ++p == pe ) - goto _test_eof30; -case 30: - switch( (*p) ) { - case 42: goto st30; - case 47: goto st2; - } - goto st29; -st31: - if ( ++p == pe ) - goto _test_eof31; -case 31: - if ( (*p) == 10 ) - goto st2; - goto st31; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof32: cs = 32; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof30: cs = 30; goto _test_eof; - _test_eof31: cs = 31; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 528 "parser.rl" - - if (cs >= JSON_object_first_final) { - long count = state->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(state->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); - } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(state->stack, count); - if (RB_UNLIKELY(json->create_additions)) { - VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } else { - klassname = rb_hash_aref(*result, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - } - return p + 1; - } else { - return NULL; - } -} - - -#line 1070 "parser.c" -enum {JSON_value_start = 1}; -enum {JSON_value_first_final = 29}; -enum {JSON_value_error = 0}; - -enum {JSON_value_en_main = 1}; - - -#line 661 "parser.rl" - - -static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - -#line 1086 "parser.c" - { - cs = JSON_value_start; - } - -#line 668 "parser.rl" - -#line 1093 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr3; - case 47: goto st6; - case 73: goto st10; - case 78: goto st17; - case 91: goto tr7; - case 102: goto st19; - case 110: goto st23; - case 116: goto st26; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr3; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 606 "parser.rl" - { - char *np = JSON_parse_string(state, json, p, pe, result); - if (np == NULL) { - p--; - {p++; cs = 29; goto _out;} - } else { - {p = (( np))-1;} + while (cursor >= state->start) { + if (*cursor-- == '\n') { + line++; } } - goto st29; -tr3: -#line 616 "parser.rl" - { - char *np; - if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - {p = (( p + 10))-1;} - p--; {p++; cs = 29; goto _out;} - } else { - raise_parse_error("unexpected token at '%s'", p); + *line_out = line; + *column_out = column; +} + +static void emit_parse_warning(const char *message, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + + VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column); + rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning); +} + +#define PARSE_ERROR_FRAGMENT_LEN 32 + +static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; + + const char *ptr = "EOF"; + if (state->cursor && state->cursor < state->end) { + ptr = state->cursor; + size_t len = 0; + while (len < PARSE_ERROR_FRAGMENT_LEN) { + char ch = ptr[len]; + if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') { + break; } + len++; } - np = JSON_parse_number(state, json, p, pe, result); - if (np != NULL) { - {p = (( np))-1;} - } - p--; {p++; cs = 29; goto _out;} - } - goto st29; -tr7: -#line 634 "parser.rl" - { - char *np; - state->in_array++; - np = JSON_parse_array(state, json, p, pe, result, current_nesting + 1); - state->in_array--; - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} - } - goto st29; -tr11: -#line 642 "parser.rl" - { - char *np; - np = JSON_parse_object(state, json, p, pe, result, current_nesting + 1); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} - } - goto st29; -tr25: -#line 599 "parser.rl" - { - if (json->allow_nan) { - *result = CInfinity; - } else { - raise_parse_error("unexpected token at '%s'", p - 7); - } - } - goto st29; -tr27: -#line 592 "parser.rl" - { - if (json->allow_nan) { - *result = CNaN; - } else { - raise_parse_error("unexpected token at '%s'", p - 2); + + if (len) { + buffer[0] = '\''; + MEMCPY(buffer + 1, ptr, char, len); + + while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte + len--; + } + + if (buffer[len] >= 0xC0) { // multibyte character start + len--; + } + + buffer[len + 1] = '\''; + buffer[len + 2] = '\0'; + ptr = (const char *)buffer; } } - goto st29; -tr31: -#line 586 "parser.rl" - { - *result = Qfalse; - } - goto st29; -tr34: -#line 583 "parser.rl" - { - *result = Qnil; - } - goto st29; -tr37: -#line 589 "parser.rl" - { - *result = Qtrue; - } - goto st29; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: -#line 648 "parser.rl" - { p--; {p++; cs = 29; goto _out;} } -#line 1220 "parser.c" - switch( (*p) ) { - case 13: goto st29; - case 32: goto st29; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st29; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st29; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st29; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 110 ) - goto st11; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - if ( (*p) == 102 ) - goto st12; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 105 ) - goto st13; - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 110 ) - goto st14; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 105 ) - goto st15; - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 116 ) - goto st16; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 121 ) - goto tr25; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 97 ) - goto st18; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 78 ) - goto tr27; - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 97 ) - goto st20; - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 108 ) - goto st21; - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 115 ) - goto st22; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - if ( (*p) == 101 ) - goto tr31; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - if ( (*p) == 117 ) - goto st24; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - if ( (*p) == 108 ) - goto st25; - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 108 ) - goto tr34; - goto st0; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - if ( (*p) == 114 ) - goto st27; - goto st0; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 117 ) - goto st28; - goto st0; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - if ( (*p) == 101 ) - goto tr37; - goto st0; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 669 "parser.rl" - - if (json->freeze) { - OBJ_FREEZE(*result); - } - - if (cs >= JSON_value_first_final) { - PUSH(*result); - return p; - } else { - return NULL; - } -} + VALUE message = rb_enc_sprintf(enc_utf8, format, ptr); + rb_str_catf(message, " at line %ld column %ld", line, column); + return message; +} -#line 1476 "parser.c" -enum {JSON_integer_start = 1}; -enum {JSON_integer_first_final = 3}; -enum {JSON_integer_error = 0}; +static VALUE parse_error_new(VALUE message, long line, long column) +{ + VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message); + rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line)); + rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column)); + return exc; +} -enum {JSON_integer_en_main = 1}; +NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + VALUE message = build_parse_error_message(format, state, line, column); + rb_exc_raise(parse_error_new(message, line, column)); +} +NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at) +{ + state->cursor = at; + raise_parse_error(format, state); +} -#line 690 "parser.rl" +/* unicode */ +static const signed char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) +static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe) { - bool negative = false; - if (*p == '-') { - negative = true; - p++; + if (RB_UNLIKELY(sp > spe - 4)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } + const unsigned char *p = (const unsigned char *)sp; + + const signed char b0 = digit_values[p[0]]; + const signed char b1 = digit_values[p[1]]; + const signed char b2 = digit_values[p[2]]; + const signed char b3 = digit_values[p[3]]; - if (negative) { - memo = -memo; + if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - return LL2NUM(memo); + + return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3; } -static char *JSON_decode_integer(JSON_ParserState *state, JSON_Parser *json, char *p, VALUE *result) +#define GET_PARSER_CONFIG \ + JSON_ParserConfig *config; \ + TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config) + +static const rb_data_type_t JSON_ParserConfig_type; + +NOINLINE(static) void +json_eat_comments(JSON_ParserState *state) { - long len = p - state->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(state->memo, p); - } else { - fbuffer_clear(&state->fbuffer); - fbuffer_append(&state->fbuffer, state->memo, len); - fbuffer_append_char(&state->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&state->fbuffer), 10); - } - return p + 1; -} - - -#line 1524 "parser.c" -enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 6}; -enum {JSON_float_error = 0}; - -enum {JSON_float_en_main = 1}; - - -#line 742 "parser.rl" - - -static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - bool is_float = false; - - -#line 1541 "parser.c" - { - cs = JSON_float_start; - } - -#line 750 "parser.rl" - state->memo = p; - -#line 1549 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st6; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st6; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr7; -tr7: -#line 734 "parser.rl" - { p--; {p++; cs = 7; goto _out;} } - goto st7; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: -#line 1596 "parser.c" - goto st0; -tr8: -#line 735 "parser.rl" - { is_float = true; } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1606 "parser.c" - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 69: goto st4; - case 101: goto st4; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) >= 45 ) - goto st0; - goto tr7; -tr9: -#line 735 "parser.rl" - { is_float = true; } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1632 "parser.c" - switch( (*p) ) { - case 43: goto st5; - case 45: goto st5; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 69: goto st0; - case 101: goto st0; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - } else if ( (*p) >= 45 ) - goto st0; - goto tr7; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto tr7; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 752 "parser.rl" - - if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(state, json, p, result); + const char *start = state->cursor; + state->cursor++; + + switch (peek(state)) { + case '/': { + state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); + if (!state->cursor) { + state->cursor = state->end; + } else { + state->cursor++; + } + break; } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); + case '*': { + state->cursor++; - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); + while (true) { + const char *next_match = memchr(state->cursor, '*', state->end - state->cursor); + if (!next_match) { + raise_parse_error_at("unterminated comment, expected closing '*/'", state, start); + } + + state->cursor = next_match + 1; + if (peek(state) == '/') { + state->cursor++; + break; } } + break; } + default: + raise_parse_error_at("unexpected token %s", state, start); + break; + } +} - long len = p - state->memo; - fbuffer_clear(&state->fbuffer); - fbuffer_append(&state->fbuffer, state->memo, len); - fbuffer_append_char(&state->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&state->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); - } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&state->fbuffer), 1)); - } +ALWAYS_INLINE(static) void +json_eat_whitespace(JSON_ParserState *state) +{ + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + if (chunk == 0x2020202020202020) { + state->cursor += 8; + continue; + } - return p + 1; - } else { - return NULL; - } -} - - - -#line 1745 "parser.c" -enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 22}; -enum {JSON_array_error = 0}; - -enum {JSON_array_en_main = 1}; - - -#line 832 "parser.rl" - - -static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - long stack_head = state->stack->head; - - -#line 1766 "parser.c" - { - cs = JSON_array_start; - } - -#line 844 "parser.rl" - -#line 1773 "parser.c" - { - short _widec; - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 91 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st18; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st2; - goto st0; -tr2: -#line 812 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(state, json, p, pe, &v, current_nesting); - if (np == NULL) { - p--; {p++; cs = 3; goto _out;} - } else { - {p = (( np))-1;} - } - } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1828 "parser.c" - _widec = (*p); - if ( 44 <= (*p) && (*p) <= 44 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 93: goto tr4; - case 300: goto st8; - case 556: goto st13; - } - if ( 9 <= _widec && _widec <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -tr4: -#line 824 "parser.rl" - { p--; {p++; cs = 22; goto _out;} } - goto st22; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: -#line 1887 "parser.c" - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st9; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 42: goto st10; - case 47: goto st12; - } - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 42 ) - goto st11; - goto st10; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st11; - case 47: goto st8; - } - goto st10; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 10 ) - goto st8; - goto st12; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) > 32 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 34: goto tr2; - case 45: goto tr2; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - case 269: goto st8; - case 288: goto st8; - case 303: goto st9; - case 525: goto st13; - case 544: goto st13; - case 559: goto st14; - } - if ( _widec < 265 ) { - if ( 48 <= _widec && _widec <= 57 ) - goto tr2; - } else if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st13; - } else - goto st8; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st10; - case 303: goto st12; - case 554: goto st15; - case 559: goto st17; - } - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 554: goto st16; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 303: goto st8; - case 554: goto st16; - case 559: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 822 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st8; - case 522: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st17; - } else if ( _widec >= 128 ) - goto st12; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - switch( (*p) ) { - case 42: goto st19; - case 47: goto st21; - } - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 42 ) - goto st20; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st2; - } - goto st19; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 10 ) - goto st2; - goto st21; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 845 "parser.rl" - - if(cs >= JSON_array_first_final) { - long count = state->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(state->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); - *result = array; + uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + state->cursor += consecutive_spaces; + break; + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; } - rvalue_stack_pop(state->stack, count); - - return p + 1; - } else { - raise_parse_error("unexpected token at '%s'", p); - return NULL; } } @@ -2264,11 +811,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +static inline bool json_string_cacheable_p(const char *string, size_t length) +{ + // We mostly want to cache strings that are likely to be repeated. + // Simple heuristics: + // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold. + // - If the first character isn't a letter, we're much less likely to see this string again. + return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]); +} + +static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name) { + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; size_t bufferSize = stringEnd - string; - if (is_name && state->in_array) { + if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); @@ -2284,323 +842,900 @@ static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *s return build_string(string, stringEnd, intern, symbolize); } -static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; - int unescape_len; - char buf[4]; - - if (is_name && state->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); - } +#define JSON_MAX_UNESCAPE_POSITIONS 16 +typedef struct _json_unescape_positions { + long size; + const char **positions; + unsigned long additional_backslashes; +} JSON_UnescapePositions; - if (RB_LIKELY(cached_key)) { - return cached_key; +static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions) +{ + while (positions->size) { + positions->size--; + const char *next_position = positions->positions[0]; + positions->positions++; + if (next_position >= pe) { + return next_position; } } - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); + if (positions->additional_backslashes) { + positions->additional_backslashes--; + return memchr(pe, '\\', stringEnd - pe); } + return NULL; +} + +NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + const char *p = string, *pe = string, *bufferStart; + char *buffer; + VALUE result = rb_str_buf_new(bufferSize); rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); - } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } + buffer = RSTRING_PTR(result); + bufferStart = buffer; + +#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe; + + while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) { + if (pe > p) { + MEMCPY(buffer, p, char, pe - p); + buffer += pe - p; + } + switch (*++pe) { + case '"': + case '/': + p = pe; // nothing to unescape just need to skip the backslash + break; + case '\\': + APPEND_CHAR('\\'); + break; + case 'n': + APPEND_CHAR('\n'); + break; + case 'r': + APPEND_CHAR('\r'); + break; + case 't': + APPEND_CHAR('\t'); + break; + case 'b': + APPEND_CHAR('\b'); + break; + case 'f': + APPEND_CHAR('\f'); + break; + case 'u': { + uint32_t ch = unescape_unicode(state, ++pe, stringEnd); + pe += 3; + /* To handle values above U+FFFF, we take a sequence of + * \uXXXX escapes in the U+D800..U+DBFF then + * U+DC00..U+DFFF ranges, take the low 10 bits from each + * to make a 20-bit number, then add 0x10000 to get the + * final codepoint. + * + * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling + * Surrogate Pairs in UTF-16", and 23.6 "Surrogates + * Area". + */ + if ((ch & 0xFC00) == 0xD800) { + pe++; + if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) { + uint32_t sur = unescape_unicode(state, pe + 2, stringEnd); + + if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) { + raise_parse_error_at("invalid surrogate pair at %s", state, p); } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; + + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); + pe += 5; + } else { + raise_parse_error_at("incomplete surrogate pair at %s", state, p); + break; } - break; - default: - p = pe; - continue; + } + + int unescape_len = convert_UTF32_to_UTF8(buffer, ch); + buffer += unescape_len; + p = ++pe; + break; } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; - } else { - pe++; + default: + if ((unsigned char)*pe < 0x20) { + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } + } + + if (config->allow_invalid_escape) { + APPEND_CHAR(*pe); + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); + } + break; } } +#undef APPEND_CHAR - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; + if (stringEnd > p) { + MEMCPY(buffer, p, char, stringEnd - p); + buffer += stringEnd - p; } rb_str_set_len(result, buffer - bufferStart); if (symbolize) { result = rb_str_intern(result); } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + result = rb_str_to_interned_str(result); } return result; } +#define MAX_FAST_INTEGER_SIZE 18 +#define MAX_NUMBER_STACK_BUFFER 128 + +typedef VALUE (*json_number_decode_func_t)(const char *ptr); -#line 2410 "parser.c" -enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 9}; -enum {JSON_string_error = 0}; +static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func) +{ + if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) { + char buffer[MAX_NUMBER_STACK_BUFFER]; + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + return func(buffer); + } else { + VALUE buffer_v = rb_str_tmp_new(len); + char *buffer = RSTRING_PTR(buffer_v); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = func(buffer); + RB_GC_GUARD(buffer_v); + return number; + } +} -enum {JSON_string_en_main = 1}; +static VALUE json_decode_inum(const char *buffer) +{ + return rb_cstr2inum(buffer, 10); +} +NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len) +{ + return json_decode_large_number(start, len, json_decode_inum); +} -#line 1068 "parser.rl" +static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end) +{ + if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) { + if (negative) { + return INT64T2NUM(-((int64_t)mantissa)); + } + return UINT64T2NUM(mantissa); + } + return json_decode_large_integer(start, end - start); +} -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) +static VALUE json_decode_dnum(const char *buffer) { - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; + return DBL2NUM(rb_cstr_to_dbl(buffer, 1)); +} + +NOINLINE(static) VALUE json_decode_large_float(const char *start, long len) +{ + return json_decode_large_number(start, len, json_decode_dnum); +} + +/* Ruby JSON optimized float decoder using vendored Ryu algorithm + * Accepts pre-extracted mantissa and exponent from first-pass validation + */ +static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative, + const char *start, const char *end) +{ + if (RB_UNLIKELY(config->decimal_class)) { + VALUE text = rb_str_new(start, end - start); + return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text); } - return ST_CONTINUE; + + if (RB_UNLIKELY(exponent > INT32_MAX)) { + return negative ? CMinusInfinity : CInfinity; + } + + if (RB_UNLIKELY(exponent < INT32_MIN)) { + return rb_float_new(negative ? -0.0 : 0.0); + } + + // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case) + // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308) + if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) { + return json_decode_large_float(start, end - start); + } + + return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative)); } -static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - -#line 2439 "parser.c" - { - cs = JSON_string_start; - } - -#line 1088 "parser.rl" - state->memo = p; - -#line 2447 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 34 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 34: goto tr2; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st2; -tr2: -#line 1050 "parser.rl" - { - *result = json_string_fastpath(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } -#line 1043 "parser.rl" - { - *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -tr6: -#line 1043 "parser.rl" - { - *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 2500 "parser.c" - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 117 ) - goto st5; - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 34: goto tr6; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st6; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st6; - } else - goto st6; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st7; - } else - goto st7; - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st8; - } else - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st4; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st4; - } else - goto st4; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1090 "parser.rl" - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; +static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count) +{ + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count)); + rvalue_stack_pop(state->value_stack, count); + + if (config->freeze) { + RB_OBJ_FREEZE(array); + } + + return array; +} + +static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs) +{ + VALUE set = rb_hash_new_capa(count / 2); + for (size_t index = 0; index < count; index += 2) { + size_t before = RHASH_SIZE(set); + VALUE key = pairs[index]; + rb_hash_aset(set, key, Qtrue); + if (RHASH_SIZE(set) == before) { + if (RB_SYMBOL_P(key)) { + return rb_sym2str(key); + } + return key; + } + } + return Qfalse; +} + +NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", + rb_inspect(duplicate_key) + ); + + emit_parse_warning(RSTRING_PTR(message), state); + RB_GC_GUARD(message); +} + +NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "duplicate key %"PRIsVALUE, + rb_inspect(duplicate_key) + ); + + long line, column; + cursor_position(state, &line, &column); + rb_str_concat(message, build_parse_error_message("", state, line, column)) ; + rb_exc_raise(parse_error_new(message, line, column)); +} + +NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs) +{ + switch (config->on_duplicate_key) { + case JSON_IGNORE: + return; + + case JSON_DEPRECATED: + // Only emit the first few deprecations to avoid spamming. + if (state->emitted_deprecations < 5) { + emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); + state->emitted_deprecations++; + } + return; + + case JSON_RAISE: + raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs)); + return; + } + UNREACHABLE; +} + +static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count) +{ + size_t entries_count = count / 2; + VALUE object = rb_hash_new_capa(entries_count); + const VALUE *pairs = rvalue_stack_peek(state->value_stack, count); + rb_hash_bulk_insert(count, pairs, object); + + if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) { + json_on_duplicate_key(state, config, count, pairs); + } + + rvalue_stack_pop(state->value_stack, count); + + if (config->freeze) { + RB_OBJ_FREEZE(object); + } + + return object; +} + +static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value) +{ + if (RB_UNLIKELY(config->on_load_proc)) { + value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil); + } + rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack); + return value; +} + +static const bool string_scan_table[256] = { + // ASCII Control Characters + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // ASCII Characters + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +#ifdef HAVE_SIMD +static SIMD_Implementation simd_impl = SIMD_NONE; +#endif /* HAVE_SIMD */ + +ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + + uint64_t mask = 0; + if (string_scan_simd_neon(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros64(mask) >> 2; + return true; + } + +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + int mask = 0; + if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros(mask); + return true; + } + } +#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ +#endif /* HAVE_SIMD */ + + while (!eos(state)) { + if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { + return true; + } + state->cursor++; + } + + // If the string ended with an unterminated escape sequence, we might + // have gone past the end. + if (RB_UNLIKELY(state->cursor > state->end)) { + state->cursor = state->end; + } + + return false; +} + +static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start) +{ + const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS]; + JSON_UnescapePositions positions = { + .size = 0, + .positions = backslashes, + .additional_backslashes = 0, + }; + + do { + switch (*state->cursor) { + case '"': { + VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions); + state->cursor++; + return string; + } + case '\\': { + if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) { + backslashes[positions.size] = state->cursor; + positions.size++; + } else { + positions.additional_backslashes++; + } + state->cursor++; + break; + } + default: + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } + break; + } + + state->cursor++; + } while (string_scan(state)); + + raise_parse_error("unexpected end of input, expected closing \"", state); + return Qfalse; +} + +ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) +{ + state->cursor++; + const char *start = state->cursor; + + if (RB_UNLIKELY(!string_scan(state))) { + raise_parse_error("unexpected end of input, expected closing \"", state); + } + + VALUE string; + if (RB_LIKELY(*state->cursor == '"')) { + string = json_string_fastpath(state, config, start, state->cursor, is_name); + state->cursor++; + } + else { + string = json_parse_escaped_string(state, config, is_name, start); + } + + return string; +} + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +// Additional References: +// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html +static inline uint64_t decode_8digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; +} + +static inline uint64_t decode_4digits_unrolled(uint32_t val) { + const uint32_t mask = 0x000000FF; + const uint32_t mul1 = 100; + val -= 0x30303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = ((val & mask) * mul1) + (((val >> 16) & mask)); + return val; +} +#endif + +static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) +{ + const char *start = state->cursor; + +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) >= sizeof(uint64_t)) { + uint64_t next_8bytes; + memcpy(&next_8bytes, state->cursor, sizeof(uint64_t)); + + // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333 + // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html + uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4); + + if (match == 0x3333333333333333) { // 8 consecutive digits + *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes); + state->cursor += 8; + continue; + } + + uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT; + + if (consecutive_digits >= 4) { + *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); + state->cursor += 4; + consecutive_digits -= 4; + } + + while (consecutive_digits) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + consecutive_digits--; + state->cursor++; + } + + return (int)(state->cursor - start); + } +#endif + + char next_char; + while (rb_isdigit(next_char = peek(state))) { + *accumulator = *accumulator * 10 + (next_char - '0'); + state->cursor++; + } + return (int)(state->cursor - start); +} + +static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) +{ + bool integer = true; + const char first_digit = *state->cursor; + + // Variables for Ryu optimization - extract digits during parsing + int64_t exponent = 0; + int decimal_point_pos = -1; + uint64_t mantissa = 0; + + // Parse integer part and extract mantissa digits + int mantissa_digits = json_parse_digits(state, &mantissa); + + if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) { + raise_parse_error_at("invalid number: %s", state, start); + } + + // Parse fractional part + if (peek(state) == '.') { + integer = false; + decimal_point_pos = mantissa_digits; // Remember position of decimal point + state->cursor++; + + int fractional_digits = json_parse_digits(state, &mantissa); + mantissa_digits += fractional_digits; + + if (RB_UNLIKELY(!fractional_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + } + + // Parse exponent + if (rb_tolower(peek(state)) == 'e') { + integer = false; + state->cursor++; + + bool negative_exponent = false; + const char next_char = peek(state); + if (next_char == '-' || next_char == '+') { + negative_exponent = next_char == '-'; + state->cursor++; + } + + uint64_t abs_exponent = 0; + int exponent_digits = json_parse_digits(state, &abs_exponent); + + if (RB_UNLIKELY(!exponent_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) { + exponent = negative_exponent ? INT64_MIN : INT64_MAX; + } else { + exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent; + } + } + + if (integer) { + return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor); + } + + // Adjust exponent based on decimal point position + if (decimal_point_pos >= 0) { + exponent -= (mantissa_digits - decimal_point_pos); + } + + return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor); +} + +static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + return json_parse_number(state, config, false, state->cursor); +} + +static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + return json_parse_number(state, config, true, state->cursor - 1); +} + +// How many values (array elements, or interleaved object keys+values) have been +// pushed onto the rvalue stack since this container opened. Used to size the +// bulk decode on close, and to tell the first key/colon from later ones. +static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack) +{ + return value_stack->head - frame->value_stack_head; +} + +// A complete value now sits on top of the rvalue stack. Advance the frame that +// was waiting for it: the root document is done, or the enclosing container +// moves on to expecting a ',' or its closing bracket. The caller passes the +// frame it already has in hand -- the one that was expecting the value -- which +// after a container close is the freshly re-exposed parent. +static inline void json_value_completed(json_frame *frame) +{ + JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT); + JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY); + JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT); + + frame->phase = (enum json_frame_phase) frame->type; +} + +ALWAYS_INLINE(static) bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset) +{ + // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this + // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch. + + size_t len = strlen(keyword); + + // Note: memcmp with a small power of two and a literal string compile to an integer comparison / + // That's why we sometime compare starting from the first byte and sometimes from the second. + if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) { + state->cursor += len; + return true; + } + return false; +} + +// Parse an arbitrary JSON value iteratively. This is a state machine driven +// entirely by the top frame's phase so it can stop at any value boundary and +// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the +// bottom of the stack, so the stack is never empty mid-parse and the document +// itself is just another frame whose value, once parsed, leaves its phase DONE. +static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) +{ + while (true) { + json_frame *frame = json_frame_stack_peek(state->frames); + + switch (frame->phase) { + case JSON_PHASE_DONE: { + // The root document value is parsed; it is the lone survivor on + // the rvalue stack. + return *rvalue_stack_peek(state->value_stack, 1); + } + + case JSON_PHASE_VALUE: { + JSON_PHASE_VALUE: + json_eat_whitespace(state); + + VALUE value; + switch (peek(state)) { + case 'n': + if (json_match_keyword(state, "null", 0)) { + value = Qnil; + break; + } + + raise_parse_error("unexpected token %s", state); + case 't': + if (json_match_keyword(state, "true", 0)) { + value = Qtrue; + break; + } + + raise_parse_error("unexpected token %s", state); + case 'f': + if (json_match_keyword(state, "false", 1)) { + value = Qfalse; + break; + } + + raise_parse_error("unexpected token %s", state); + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && json_match_keyword(state, "NaN", 1)) { + value = CNaN; + break; + } + + raise_parse_error("unexpected token %s", state); + case 'I': + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + value = CInfinity; + break; + } + + raise_parse_error("unexpected token %s", state); + case '-': { + state->cursor++; + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + value = CMinusInfinity; + } else { + value = json_parse_negative_number(state, config); + } + break; + } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + value = json_parse_positive_number(state, config); + break; + case '"': + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + value = json_parse_string(state, config, false); + break; + case '[': { + state->cursor++; + json_eat_whitespace(state); + + if (peek(state) == ']') { + state->cursor++; + value = json_decode_array(state, config, 0); + break; + } + + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + state->in_array++; + + // Phase stays VALUE: the next iteration reads the first element. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_ARRAY, + .phase = JSON_PHASE_VALUE, + .value_stack_head = state->value_stack->head, + }); + goto JSON_PHASE_VALUE; + break; + } + case '{': { + const char *object_start_cursor = state->cursor; + + state->cursor++; + json_eat_whitespace(state); + + if (peek(state) == '}') { + state->cursor++; + value = json_decode_object(state, config, 0); + break; + } + + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + + // Phase KEY: the next iteration reads the first key. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_OBJECT, + .phase = JSON_PHASE_OBJECT_KEY, + .value_stack_head = state->value_stack->head, + .start_cursor = object_start_cursor, + }); + goto JSON_PHASE_OBJECT_KEY; + break; + } + + case 0: + raise_parse_error("unexpected end of input", state); + + default: + raise_parse_error("unexpected character: %s", state); + } + + json_push_value(state, config, value); + json_value_completed(frame); + break; + } + + case JSON_PHASE_OBJECT_KEY: { + JSON_PHASE_OBJECT_KEY: + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + + if (RB_LIKELY(peek(state) == '"')) { + json_push_value(state, config, json_parse_string(state, config, true)); + frame->phase = JSON_PHASE_OBJECT_COLON; + goto JSON_PHASE_OBJECT_COLON; + } else { + // The message differs for the first key vs. a key after a + // ',': the first is the only one reached with nothing pushed + // for this object yet. + if (json_frame_entry_count(frame, state->value_stack) == 0) { + raise_parse_error("expected object key, got %s", state); + } else { + raise_parse_error("expected object key, got: %s", state); + } + } + break; + } + + case JSON_PHASE_OBJECT_COLON: { + JSON_PHASE_OBJECT_COLON: + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + + if (RB_LIKELY(peek(state) == ':')) { + state->cursor++; + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else { + // First colon (only the first pair's key is pushed, nothing + // else) vs. a later one. + if (json_frame_entry_count(frame, state->value_stack) == 1) { + raise_parse_error("expected ':' after object key", state); + } else { + raise_parse_error("expected ':' after object key, got: %s", state); + } + } + break; + } + + case JSON_PHASE_ARRAY_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_ARRAY); + + json_eat_whitespace(state); + + const char next_char = peek(state); + + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == ']') { + // Trailing comma: stay in COMMA to close on the next iteration. + break; + } + } + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else if (next_char == ']') { + state->cursor++; + long count = json_frame_entry_count(frame, state->value_stack); + state->current_nesting--; + state->in_array--; + json_frame_stack_pop(state->frames); + json_push_value(state, config, json_decode_array(state, config, count)); + json_value_completed(json_frame_stack_peek(state->frames)); + } else { + raise_parse_error("expected ',' or ']' after array value", state); + } + break; + } + + case JSON_PHASE_OBJECT_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); + + json_eat_whitespace(state); + const char next_char = peek(state); + + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + json_eat_whitespace(state); + + if (config->allow_trailing_comma) { + if (peek(state) == '}') { + // Trailing comma: stay in COMMA to close on the next iteration. + break; + } + } + + frame->phase = JSON_PHASE_OBJECT_KEY; + goto JSON_PHASE_OBJECT_KEY; + + break; + } else if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = json_frame_entry_count(frame, state->value_stack); + + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = frame->start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; + + json_frame_stack_pop(state->frames); + json_push_value(state, config, object); + json_value_completed(json_frame_stack_peek(state->frames)); + break; + } else { + raise_parse_error("expected ',' or '}' after object value, got: %s", state); + } + } + } + } +} + +static void json_ensure_eof(JSON_ParserState *state) +{ + json_eat_whitespace(state); + if (!eos(state)) { + raise_parse_error("unexpected token at end of stream %s", state); } } @@ -2618,67 +1753,97 @@ case 8: static VALUE convert_encoding(VALUE source) { - int encindex = RB_ENCODING_GET(source); + StringValue(source); + int encindex = RB_ENCODING_GET(source); + + if (RB_LIKELY(encindex == utf8_encindex)) { + return source; + } + + if (encindex == binary_encindex) { + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + } - if (RB_LIKELY(encindex == utf8_encindex)) { + source = rb_funcall(source, i_encode, 1, Encoding_UTF_8); + StringValue(source); return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; +} + +struct parser_config_init_args { + JSON_ParserConfig *config; + VALUE self; +}; + +static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val) +{ + *dest = val; + if (self) RB_OBJ_WRITTEN(self, Qundef, val); +} + +static int parser_config_init_i(VALUE key, VALUE val, VALUE data) +{ + struct parser_config_init_args *args = (struct parser_config_init_args *)data; + JSON_ParserConfig *config = args->config; + VALUE self = args->self; + + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { parser_config_wb_write(self, &config->on_load_proc, RTEST(val) ? val : Qfalse); } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { + if (RTEST(val)) { + if (rb_respond_to(val, i_try_convert)) { + parser_config_wb_write(self, &config->decimal_class, val); + config->decimal_method_id = i_try_convert; + } else if (rb_respond_to(val, i_new)) { + parser_config_wb_write(self, &config->decimal_class, val); + config->decimal_method_id = i_new; + } else if (RB_TYPE_P(val, T_CLASS)) { + VALUE name = rb_class_name(val); + const char *name_cstr = RSTRING_PTR(name); + const char *last_colon = strrchr(name_cstr, ':'); + if (last_colon) { + const char *mod_path_end = last_colon - 1; + VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); + parser_config_wb_write(self, &config->decimal_class, rb_path_to_class(mod_path)); + + const char *method_name_beg = last_colon + 1; + long before_len = method_name_beg - name_cstr; + long len = RSTRING_LEN(name) - before_len; + VALUE method_name = rb_str_substr(name, before_len, len); + config->decimal_method_id = SYM2ID(rb_str_intern(method_name)); + } else { + parser_config_wb_write(self, &config->decimal_class, rb_mKernel); + config->decimal_method_id = SYM2ID(rb_str_intern(name)); + } + } } } return ST_CONTINUE; } -static void parser_init(JSON_Parser *json, VALUE opts) +static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self) { - json->max_nesting = 100; + config->max_nesting = 100; + + struct parser_config_init_args args = { + .config = config, + .self = self, + }; if (!NIL_P(opts)) { Check_Type(opts, T_HASH); if (RHASH_SIZE(opts) > 0) { // We assume in most cases few keys are set so it's faster to go over // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } + rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); } } @@ -2703,235 +1868,81 @@ static void parser_init(JSON_Parser *json, VALUE opts) * (keys) in a JSON object. Otherwise strings are returned, which is * also the default. It's not possible to use this option in * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. * * *decimal_class*: Specifies which class to use instead of the default * (Float) when parsing decimal numbers. This class must accept a single * string argument in its constructor. */ static VALUE cParserConfig_initialize(VALUE self, VALUE opts) { - GET_PARSER; + rb_check_frozen(self); + GET_PARSER_CONFIG; + + parser_config_init(config, opts, self); - parser_init(json, opts); return self; } +static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src) +{ + VALUE Vsource = convert_encoding(src); -#line 2729 "parser.c" -enum {JSON_start = 1}; -enum {JSON_first_final = 10}; -enum {JSON_error = 0}; - -enum {JSON_en_main = 1}; - - -#line 1244 "parser.rl" - - -static VALUE cParser_parse_safe(VALUE vstate) -{ - JSON_ParserState *state = (JSON_ParserState *)vstate; - VALUE result = Qnil; - char *p, *pe; - int cs = EVIL; - JSON_Parser *json = state->json; - - -#line 2749 "parser.c" - { - cs = JSON_start; - } - -#line 1255 "parser.rl" - p = state->source; - pe = p + state->len; - -#line 2758 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 1236 "parser.rl" - { - char *np = JSON_parse_value(state, json, p, pe, &result, 0); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 2802 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1258 "parser.rl" - - if (state->stack_handle) { - rvalue_stack_eagerly_release(state->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; + // Ensure the string isn't mutated under us. + // The classic API to use is `rb_str_locktmp`, but then we'd + // need to use `rb_protect` to make sure we always unlock. + if (Vsource == src) { + Vsource = rb_str_new_frozen(Vsource); } -} - -static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource) -{ - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { + rvalue_stack value_stack = { .type = RVALUE_STACK_STACK_ALLOCATED, .ptr = rvalue_stack_buffer, .capa = RVALUE_STACK_INITIAL_CAPA, }; + // Seed the frame stack with the root frame, establishing the invariant that + // json_parse_any always has a top frame to dispatch on (so the stack is never + // empty mid-parse). + json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA]; + frame_stack_buffer[0] = (json_frame){ + .type = JSON_FRAME_ROOT, + .phase = JSON_PHASE_VALUE, + }; + json_frame_stack frames = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = frame_stack_buffer, + .capa = JSON_FRAME_STACK_INITIAL_CAPA, + .head = 1, + }; + + long len; + const char *start; + + RSTRING_GETMEM(Vsource, start, len); + + VALUE value_stack_handle = 0; + VALUE frame_stack_handle = 0; JSON_ParserState _state = { - .json = json, - .len = RSTRING_LEN(Vsource), - .source = RSTRING_PTR(Vsource), - .Vsource = Vsource, - .stack = &stack, + .start = start, + .cursor = start, + .end = start + len, + .value_stack = &value_stack, + .value_stack_handle = &value_stack_handle, + .frames = &frames, + .frame_stack_handle = &frame_stack_handle, }; JSON_ParserState *state = &_state; - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&state->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + VALUE result = json_parse_any(state, config); - int interupted; - VALUE result = rb_protect(cParser_parse_safe, (VALUE)state, &interupted); - - fbuffer_free(&state->fbuffer); - if (interupted) { - rb_jump_tag(interupted); - } + // This may be skipped in case of exception, but + // it won't cause a leak. + rvalue_stack_eagerly_release(value_stack_handle); + json_frame_stack_eagerly_release(frame_stack_handle); + RB_GC_GUARD(value_stack_handle); + RB_GC_GUARD(frame_stack_handle); + RB_GC_GUARD(Vsource); + json_ensure_eof(state); return result; } @@ -2945,54 +1956,57 @@ static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource) */ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) { - GET_PARSER; - return cParser_parse(json, Vsource); + GET_PARSER_CONFIG; + return cParser_parse(config, Vsource); } static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) { - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, opts); + JSON_ParserConfig _config = {0}; + JSON_ParserConfig *config = &_config; + parser_config_init(config, opts, false); - return cParser_parse(json, Vsource); + return cParser_parse(config, Vsource); } -static void JSON_mark(void *ptr) +static void JSON_ParserConfig_mark(void *ptr) { - JSON_Parser *json = ptr; - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); + JSON_ParserConfig *config = ptr; + rb_gc_mark_movable(config->on_load_proc); + rb_gc_mark_movable(config->decimal_class); } -static void JSON_free(void *ptr) +static size_t JSON_ParserConfig_memsize(const void *ptr) { - JSON_Parser *json = ptr; - ruby_xfree(json); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + return 0; +#else + return sizeof(JSON_ParserConfig); +#endif } -static size_t JSON_memsize(const void *ptr) +static void JSON_ParserConfig_compact(void *ptr) { - return sizeof(JSON_Parser); + JSON_ParserConfig *config = ptr; + config->on_load_proc = rb_gc_location(config->on_load_proc); + config->decimal_class = rb_gc_location(config->decimal_class); } -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, +static const rb_data_type_t JSON_ParserConfig_type = { + .wrap_struct_name = "JSON::Ext::Parser/ParserConfig", + .function = { + .dmark = JSON_ParserConfig_mark, + .dfree = RUBY_DEFAULT_FREE, + .dsize = JSON_ParserConfig_memsize, + .dcompact = JSON_ParserConfig_compact, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static VALUE cJSON_parser_s_allocate(VALUE klass) { - JSON_Parser *json; - return TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); + JSON_ParserConfig *config; + return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config); } void Init_parser(void) @@ -3030,24 +2044,14 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); + sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); + sym_on_load = ID2SYM(rb_intern("on_load")); sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - - i_create_id = rb_intern("create_id"); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_chr = rb_intern("chr"); - i_match = rb_intern("match"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); i_uminus = rb_intern("-@"); @@ -3056,12 +2060,8 @@ void Init_parser(void) binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); enc_utf8 = rb_utf8_encoding(); -} -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ +#ifdef HAVE_SIMD + simd_impl = find_simd_implementation(); +#endif +} diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl deleted file mode 100644 index 50226a7259..0000000000 --- a/ext/json/parser/parser.rl +++ /dev/null @@ -1,1434 +0,0 @@ -#include "ruby.h" -#include "../fbuffer/fbuffer.h" - -static VALUE mJSON, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - -#ifndef HAVE_RB_HASH_BULK_INSERT -// For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) -{ - long index = 0; - while (index < count) { - VALUE name = pairs[index++]; - VALUE value = pairs[index++]; - rb_hash_aset(hash, name, value); - } - RB_GC_GUARD(hash); -} -#endif - -/* name cache */ - -#include <string.h> -#include <ctype.h> - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -static rb_encoding *enc_utf8; - -#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 - -static inline VALUE build_interned_string(const char *str, const long length) -{ -# ifdef HAVE_RB_ENC_INTERNED_STR - return rb_enc_interned_str(str, length, enc_utf8); -# else - VALUE rstring = rb_utf8_str_new(str, length); - return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); -# endif -} - -static inline VALUE build_symbol(const char *str, const long length) -{ - return rb_str_intern(build_interned_string(str, length)); -} - -static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) -{ - MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); - cache->length++; - cache->entries[index] = rstring; -} - -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); - } -} - -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rstring = build_interned_string(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); - } - return rstring; -} - -static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rsymbol = build_symbol(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); - } - return rsymbol; -} - -/* rvalue stack */ - -#define RVALUE_STACK_INITIAL_CAPA 128 - -enum rvalue_stack_type { - RVALUE_STACK_HEAP_ALLOCATED = 0, - RVALUE_STACK_STACK_ALLOCATED = 1, -}; - -typedef struct rvalue_stack_struct { - enum rvalue_stack_type type; - long capa; - long head; - VALUE *ptr; -} rvalue_stack; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); - -static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) -{ - long required = stack->capa * 2; - - if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { - stack = rvalue_stack_spill(stack, handle, stack_ref); - } else { - REALLOC_N(stack->ptr, VALUE, required); - stack->capa = required; - } - return stack; -} - -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) -{ - if (RB_UNLIKELY(stack->head >= stack->capa)) { - stack = rvalue_stack_grow(stack, handle, stack_ref); - } - stack->ptr[stack->head] = value; - stack->head++; -} - -static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) -{ - return stack->ptr + (stack->head - count); -} - -static inline void rvalue_stack_pop(rvalue_stack *stack, long count) -{ - stack->head -= count; -} - -static void rvalue_stack_mark(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); - } -} - -static void rvalue_stack_free(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); - } -} - -static size_t rvalue_stack_memsize(const void *ptr) -{ - const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; -} - -static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { - .dmark = rvalue_stack_mark, - .dfree = rvalue_stack_free, - .dsize = rvalue_stack_memsize, - }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) -{ - rvalue_stack *stack; - *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - *stack_ref = stack; - MEMCPY(stack, old_stack, rvalue_stack, 1); - - stack->capa = old_stack->capa << 1; - stack->ptr = ALLOC_N(VALUE, stack->capa); - stack->type = RVALUE_STACK_HEAP_ALLOCATED; - MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); - return stack; -} - -static void rvalue_stack_eagerly_release(VALUE handle) -{ - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); -} - -/* unicode */ - -static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static uint32_t unescape_unicode(const unsigned char *p) -{ - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - return result; -} - -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} - -typedef struct JSON_ParserStruct { - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; -} JSON_Parser; - -typedef struct JSON_ParserStateStruct { - JSON_Parser *json; - VALUE Vsource; - VALUE stack_handle; - char *source; - long len; - char *memo; - FBuffer fbuffer; - rvalue_stack *stack; - rvalue_cache name_cache; - int in_array; -} JSON_ParserState; - -#define GET_PARSER \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); - -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - - -%%{ - machine JSON_common; - - cr = '\n'; - cr_neg = [^\n]; - ws = [ \t\r\n]; - c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; - cpp_comment = '//' cr_neg* cr; - comment = c_comment | cpp_comment; - ignore = ws | comment; - name_separator = ':'; - value_separator = ','; - Vnull = 'null'; - Vfalse = 'false'; - Vtrue = 'true'; - VNaN = 'NaN'; - VInfinity = 'Infinity'; - VMinusInfinity = '-Infinity'; - begin_value = [nft\"\-\[\{NI] | digit; - begin_object = '{'; - end_object = '}'; - begin_array = '['; - end_array = ']'; - begin_string = '"'; - begin_name = begin_string; - begin_number = digit | '-'; -}%% - -%%{ - machine JSON_object; - include JSON_common; - - write data; - - action parse_value { - char *np = JSON_parse_value(state, json, fpc, pe, result, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - fexec np; - } - } - - action allow_trailing_comma { json->allow_trailing_comma } - - action parse_name { - char *np; - json->parsing_name = true; - np = JSON_parse_string(state, json, fpc, pe, result); - json->parsing_name = false; - if (np == NULL) { fhold; fbreak; } else { - PUSH(*result); - fexec np; - } - } - - action exit { fhold; fbreak; } - - pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value; - next_pair = ignore* value_separator pair; - - main := ( - begin_object - (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* - end_object - ) @exit; -}%% - -#define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack) - -static char *JSON_parse_object(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - long stack_head = state->stack->head; - - %% write init; - %% write exec; - - if (cs >= JSON_object_first_final) { - long count = state->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(state->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); - } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(state->stack, count); - - if (RB_UNLIKELY(json->create_additions)) { - VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } else { - klassname = rb_hash_aref(*result, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - } - return p + 1; - } else { - return NULL; - } -} - -%%{ - machine JSON_value; - include JSON_common; - - write data; - - action parse_null { - *result = Qnil; - } - action parse_false { - *result = Qfalse; - } - action parse_true { - *result = Qtrue; - } - action parse_nan { - if (json->allow_nan) { - *result = CNaN; - } else { - raise_parse_error("unexpected token at '%s'", p - 2); - } - } - action parse_infinity { - if (json->allow_nan) { - *result = CInfinity; - } else { - raise_parse_error("unexpected token at '%s'", p - 7); - } - } - action parse_string { - char *np = JSON_parse_string(state, json, fpc, pe, result); - if (np == NULL) { - fhold; - fbreak; - } else { - fexec np; - } - } - - action parse_number { - char *np; - if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - fexec p + 10; - fhold; fbreak; - } else { - raise_parse_error("unexpected token at '%s'", p); - } - } - np = JSON_parse_number(state, json, fpc, pe, result); - if (np != NULL) { - fexec np; - } - fhold; fbreak; - } - - action parse_array { - char *np; - state->in_array++; - np = JSON_parse_array(state, json, fpc, pe, result, current_nesting + 1); - state->in_array--; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_object { - char *np; - np = JSON_parse_object(state, json, fpc, pe, result, current_nesting + 1); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - -main := ignore* ( - Vnull @parse_null | - Vfalse @parse_false | - Vtrue @parse_true | - VNaN @parse_nan | - VInfinity @parse_infinity | - begin_number @parse_number | - begin_string @parse_string | - begin_array @parse_array | - begin_object @parse_object - ) ignore* %*exit; -}%% - -static char *JSON_parse_value(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - %% write init; - %% write exec; - - if (json->freeze) { - OBJ_FREEZE(*result); - } - - if (cs >= JSON_value_first_final) { - PUSH(*result); - return p; - } else { - return NULL; - } -} - -%%{ - machine JSON_integer; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); -}%% - -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) -{ - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } - - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } - - if (negative) { - memo = -memo; - } - return LL2NUM(memo); -} - -static char *JSON_decode_integer(JSON_ParserState *state, JSON_Parser *json, char *p, VALUE *result) -{ - long len = p - state->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(state->memo, p); - } else { - fbuffer_clear(&state->fbuffer); - fbuffer_append(&state->fbuffer, state->memo, len); - fbuffer_append_char(&state->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&state->fbuffer), 10); - } - return p + 1; -} - -%%{ - machine JSON_float; - include JSON_common; - - write data; - - action exit { fhold; fbreak; } - action isFloat { is_float = true; } - - main := '-'? ( - (('0' | [1-9][0-9]*) - ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | - ([Ee] [+\-]?[0-9]+)) > isFloat)? - ) (^[0-9Ee.\-]? @exit )); -}%% - -static char *JSON_parse_number(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - bool is_float = false; - - %% write init; - state->memo = p; - %% write exec; - - if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(state, json, p, result); - } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); - - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); - } - } - } - - long len = p - state->memo; - fbuffer_clear(&state->fbuffer); - fbuffer_append(&state->fbuffer, state->memo, len); - fbuffer_append_char(&state->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&state->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); - } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&state->fbuffer), 1)); - } - - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_array; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(state, json, fpc, pe, &v, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - fexec np; - } - } - - action allow_trailing_comma { json->allow_trailing_comma } - - action exit { fhold; fbreak; } - - next_element = value_separator ignore* begin_value >parse_value; - - main := begin_array ignore* - ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? - end_array @exit; -}%% - -static char *JSON_parse_array(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - long stack_head = state->stack->head; - - %% write init; - %% write exec; - - if(cs >= JSON_array_first_final) { - long count = state->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(state->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); - *result = array; - } - rvalue_stack_pop(state->stack, count); - - return p + 1; - } else { - raise_parse_error("unexpected token at '%s'", p); - return NULL; - } -} - -static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) -{ - if (symbolize) { - intern = true; - } - VALUE result; -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); - } else { - result = rb_utf8_str_new(start, (long)(end - start)); - } -# else - result = rb_utf8_str_new(start, (long)(end - start)); - if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } -# endif - - if (symbolize) { - result = rb_str_intern(result); - } - - return result; -} - -static VALUE json_string_fastpath(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - - if (is_name && state->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - return build_string(string, stringEnd, intern, symbolize); -} - -static VALUE json_string_unescape(JSON_ParserState *state, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; - int unescape_len; - char buf[4]; - - if (is_name && state->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); - } - - VALUE result = rb_str_buf_new(bufferSize); - rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); - } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } - } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; - } - break; - default: - p = pe; - continue; - } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; - } else { - pe++; - } - } - - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - rb_str_set_len(result, buffer - bufferStart); - - if (symbolize) { - result = rb_str_intern(result); - } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } - - return result; -} - -%%{ - machine JSON_string; - include JSON_common; - - write data; - - action parse_complex_string { - *result = json_string_unescape(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } - - action parse_simple_string { - *result = json_string_fastpath(state, state->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } - - double_quote = '"'; - escape = '\\'; - control = 0..0x1f; - simple = any - escape - double_quote - control; - - main := double_quote ( - (simple*)( - (double_quote) @parse_simple_string | - ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string - ) - ); -}%% - -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} - -static char *JSON_parse_string(JSON_ParserState *state, JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - %% write init; - state->memo = p; - %% write exec; - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; - } -} - -/* - * Document-class: JSON::Ext::Parser - * - * This is the JSON parser implemented as a C extension. It can be configured - * to be used by setting - * - * JSON.parser = JSON::Ext::Parser - * - * with the method parser= in JSON. - * - */ - -static VALUE convert_encoding(VALUE source) -{ - int encindex = RB_ENCODING_GET(source); - - if (RB_LIKELY(encindex == utf8_encindex)) { - return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; - } - } - - return ST_CONTINUE; -} - -static void parser_init(JSON_Parser *json, VALUE opts) -{ - json->max_nesting = 100; - - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - } - - } -} - -/* - * call-seq: new(opts => {}) - * - * Creates a new JSON::Ext::ParserConfig instance. - * - * It will be configured by the _opts_ hash. _opts_ can have the following - * keys: - * - * _opts_ can have the following keys: - * * *max_nesting*: The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, it - * defaults to 100. - * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - * defiance of RFC 4627 to be parsed by the Parser. This option defaults to - * false. - * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is - * also the default. It's not possible to use this option in - * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. - * * *decimal_class*: Specifies which class to use instead of the default - * (Float) when parsing decimal numbers. This class must accept a single - * string argument in its constructor. - */ -static VALUE cParserConfig_initialize(VALUE self, VALUE opts) -{ - GET_PARSER; - - parser_init(json, opts); - return self; -} - -%%{ - machine JSON; - - write data; - - include JSON_common; - - action parse_value { - char *np = JSON_parse_value(state, json, fpc, pe, &result, 0); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_value >parse_value - ) ignore*; -}%% - -static VALUE cParser_parse_safe(VALUE vstate) -{ - JSON_ParserState *state = (JSON_ParserState *)vstate; - VALUE result = Qnil; - char *p, *pe; - int cs = EVIL; - JSON_Parser *json = state->json; - - %% write init; - p = state->source; - pe = p + state->len; - %% write exec; - - if (state->stack_handle) { - rvalue_stack_eagerly_release(state->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static VALUE cParser_parse(JSON_Parser *json, VALUE Vsource) -{ - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - - JSON_ParserState _state = { - .json = json, - .len = RSTRING_LEN(Vsource), - .source = RSTRING_PTR(Vsource), - .Vsource = Vsource, - .stack = &stack, - }; - JSON_ParserState *state = &_state; - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&state->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - int interupted; - VALUE result = rb_protect(cParser_parse_safe, (VALUE)state, &interupted); - - fbuffer_free(&state->fbuffer); - if (interupted) { - rb_jump_tag(interupted); - } - - return result; -} - -/* - * call-seq: parse(source) - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - * It raises JSON::ParserError if fail to parse. - */ -static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) -{ - GET_PARSER; - return cParser_parse(json, Vsource); -} - -static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) -{ - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, opts); - - return cParser_parse(json, Vsource); -} - -static void JSON_mark(void *ptr) -{ - JSON_Parser *json = ptr; - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); -} - -static void JSON_free(void *ptr) -{ - JSON_Parser *json = ptr; - ruby_xfree(json); -} - -static size_t JSON_memsize(const void *ptr) -{ - return sizeof(JSON_Parser); -} - -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static VALUE cJSON_parser_s_allocate(VALUE klass) -{ - JSON_Parser *json; - return TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); -} - -void Init_parser(void) -{ -#ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); -#endif - -#undef rb_intern - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - VALUE mExt = rb_define_module_under(mJSON, "Ext"); - VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject); - eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); - rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate); - rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1); - rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1); - - VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); - - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); - - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); - - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); - - rb_global_variable(&Encoding_UTF_8); - Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); - - sym_max_nesting = ID2SYM(rb_intern("max_nesting")); - sym_allow_nan = ID2SYM(rb_intern("allow_nan")); - sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); - sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); - sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); - sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - - i_create_id = rb_intern("create_id"); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_chr = rb_intern("chr"); - i_match = rb_intern("match"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); - i_new = rb_intern("new"); - i_try_convert = rb_intern("try_convert"); - i_uminus = rb_intern("-@"); - i_encode = rb_intern("encode"); - - binary_encindex = rb_ascii8bit_encindex(); - utf8_encindex = rb_utf8_encindex(); - enc_utf8 = rb_utf8_encoding(); -} - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk deleted file mode 100644 index fc59169056..0000000000 --- a/ext/json/parser/prereq.mk +++ /dev/null @@ -1,13 +0,0 @@ -RAGEL = ragel - -.SUFFIXES: .rl - -.rl.c: - $(RAGEL) -G2 $< - $(BASERUBY) -pli -e '$$_.sub!(/[ \t]+$$/, "")' \ - -e '$$_.sub!(/^static const int (JSON_.*=.*);$$/, "enum {\\1};")' \ - -e '$$_.sub!(/^(static const char) (_JSON(?:_\w+)?_nfa_\w+)(?=\[\] =)/, "\\1 MAYBE_UNUSED(\\2)")' \ - -e '$$_.sub!(/0 <= ([\( ]+\*[\( ]*p\)+) && \1 <= 31/, "0 <= (signed char)(*(p)) && (*(p)) <= 31")' \ - -e '$$_ = "/* This file is automatically generated from parser.rl by using ragel */\n" + $$_ if $$. == 1' $@ - -parser.c: diff --git a/ext/json/simd/conf.rb b/ext/json/simd/conf.rb new file mode 100644 index 0000000000..76f774bc97 --- /dev/null +++ b/ext/json/simd/conf.rb @@ -0,0 +1,24 @@ +case RbConfig::CONFIG['host_cpu'] +when /^(arm|aarch64)/ + # Try to compile a small program using NEON instructions + header, type, init, extra = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)', nil +when /^(x86_64|x64)/ + header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }' +end +if header + if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration') + #{cpp_include(header)} + int main(int argc, char **argv) { + #{type} test = #{init}; + #{extra} + if (argc > 100000) printf("%p", &test); + return 0; + } + SRC + $defs.push("-DJSON_ENABLE_SIMD") + else + puts "Disable SIMD" + end +end + +have_header('cpuid.h') diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h new file mode 100644 index 0000000000..611b41b066 --- /dev/null +++ b/ext/json/simd/simd.h @@ -0,0 +1,208 @@ +#include "../json.h" + +typedef enum { + SIMD_NONE, + SIMD_NEON, + SIMD_SSE2 +} SIMD_Implementation; + +#ifndef __has_builtin // Optional of course. + #define __has_builtin(x) 0 // Compatibility with non-clang compilers. +#endif + +#ifdef __clang__ +# if __has_builtin(__builtin_ctzll) +# define HAVE_BUILTIN_CTZLL 1 +# else +# define HAVE_BUILTIN_CTZLL 0 +# endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define HAVE_BUILTIN_CTZLL 1 +#else +# define HAVE_BUILTIN_CTZLL 0 +#endif + +static inline uint32_t trailing_zeros64(uint64_t input) +{ + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + +#if HAVE_BUILTIN_CTZLL + return __builtin_ctzll(input); +#else + uint32_t trailing_zeros = 0; + uint64_t temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +static inline int trailing_zeros(int input) +{ + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + +#if HAVE_BUILTIN_CTZLL + return __builtin_ctz(input); +#else + int trailing_zeros = 0; + int temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +#ifdef JSON_ENABLE_SIMD + +#define SIMD_MINIMUM_THRESHOLD 4 + +ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len) +{ + RBIMPL_ASSERT_OR_ASSUME(len < 16); + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4 +#if defined(__has_builtin) && __has_builtin(__builtin_memcpy) + // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes. + // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy + // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct + // position in both copies. + + // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the + // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)), + // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional + // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch + // plus two loads and stores generated when using __builtin_memcpy. + if (len >= 8) { + __builtin_memcpy(dst, src, 8); + __builtin_memcpy(dst + len - 8, src + len - 8, 8); + } else { + __builtin_memcpy(dst, src, 4); + __builtin_memcpy(dst + len - 4, src + len - 4, 4); + } +#else + MEMCPY(dst, src, char, len); +#endif +} + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) +#include <arm_neon.h> + +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 +static inline SIMD_Implementation find_simd_implementation(void) +{ + return SIMD_NEON; +} + +#define HAVE_SIMD 1 +#define HAVE_SIMD_NEON 1 + +// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +ALWAYS_INLINE(static) uint64_t neon_match_mask(uint8x16_t matches) +{ + const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); + const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return mask & 0x8888888888888888ull; +} + +ALWAYS_INLINE(static) uint64_t compute_chunk_mask_neon(const char *ptr) +{ + uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); + + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33)); + + uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\')); + uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash); + return neon_match_mask(needs_escape); +} + +ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask) +{ + while (*ptr + sizeof(uint8x16_t) <= end) { + uint64_t chunk_mask = compute_chunk_mask_neon(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(uint8x16_t); + } + return 0; +} + +#endif /* ARM Neon Support.*/ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + +#ifdef HAVE_X86INTRIN_H +#include <x86intrin.h> + +#define HAVE_SIMD 1 +#define HAVE_SIMD_SSE2 1 + +#ifdef HAVE_CPUID_H +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif + +#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) +#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) +#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) +#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) + +ALWAYS_INLINE(static) TARGET_SSE2 int compute_chunk_mask_sse2(const char *ptr) +{ + __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33)); + __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\')); + __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash); + return _mm_movemask_epi8(needs_escape); +} + +ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, const char *end, int *mask) +{ + while (*ptr + sizeof(__m128i) <= end) { + int chunk_mask = compute_chunk_mask_sse2(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(__m128i); + } + + return 0; +} + +#include <cpuid.h> +#endif /* HAVE_CPUID_H */ + +static inline SIMD_Implementation find_simd_implementation(void) +{ + // TODO Revisit. I think the SSE version now only uses SSE2 instructions. + if (__builtin_cpu_supports("sse2")) { + return SIMD_SSE2; + } + + return SIMD_NONE; +} + +#endif /* HAVE_X86INTRIN_H */ +#endif /* X86_64 Support */ + +#endif /* JSON_ENABLE_SIMD */ + +#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED +static inline SIMD_Implementation find_simd_implementation(void) +{ + return SIMD_NONE; +} +#endif diff --git a/ext/json/vendor/fpconv.c b/ext/json/vendor/fpconv.c new file mode 100644 index 0000000000..6c9bc2c103 --- /dev/null +++ b/ext/json/vendor/fpconv.c @@ -0,0 +1,480 @@ +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +// The contents of this file is extracted from https://github.com/night-shift/fpconv +// It was slightly modified to append ".0" to plain floats, for use with the https://github.com/ruby/json package. + +#include <stdbool.h> +#include <string.h> +#include <stdint.h> + +#if JSON_DEBUG +#include <assert.h> +#endif + +#define npowers 87 +#define steppowers 8 +#define firstpower -348 /* 10 ^ -348 */ + +#define expmax -32 +#define expmin -60 + +typedef struct Fp { + uint64_t frac; + int exp; +} Fp; + +static const Fp powers_ten[] = { + { 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 }, + { 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 }, + { 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 }, + { 12353653155963782858U, -1060 }, { 18408377700990114895U, -1034 }, + { 13715310171984221708U, -1007 }, { 10218702384817765436U, -980 }, + { 15227053142812498563U, -954 }, { 11345038669416679861U, -927 }, + { 16905424996341287883U, -901 }, { 12595523146049147757U, -874 }, + { 9384396036005875287U, -847 }, { 13983839803942852151U, -821 }, + { 10418772551374772303U, -794 }, { 15525180923007089351U, -768 }, + { 11567161174868858868U, -741 }, { 17236413322193710309U, -715 }, + { 12842128665889583758U, -688 }, { 9568131466127621947U, -661 }, + { 14257626930069360058U, -635 }, { 10622759856335341974U, -608 }, + { 15829145694278690180U, -582 }, { 11793632577567316726U, -555 }, + { 17573882009934360870U, -529 }, { 13093562431584567480U, -502 }, + { 9755464219737475723U, -475 }, { 14536774485912137811U, -449 }, + { 10830740992659433045U, -422 }, { 16139061738043178685U, -396 }, + { 12024538023802026127U, -369 }, { 17917957937422433684U, -343 }, + { 13349918974505688015U, -316 }, { 9946464728195732843U, -289 }, + { 14821387422376473014U, -263 }, { 11042794154864902060U, -236 }, + { 16455045573212060422U, -210 }, { 12259964326927110867U, -183 }, + { 18268770466636286478U, -157 }, { 13611294676837538539U, -130 }, + { 10141204801825835212U, -103 }, { 15111572745182864684U, -77 }, + { 11258999068426240000U, -50 }, { 16777216000000000000U, -24 }, + { 12500000000000000000U, 3 }, { 9313225746154785156U, 30 }, + { 13877787807814456755U, 56 }, { 10339757656912845936U, 83 }, + { 15407439555097886824U, 109 }, { 11479437019748901445U, 136 }, + { 17105694144590052135U, 162 }, { 12744735289059618216U, 189 }, + { 9495567745759798747U, 216 }, { 14149498560666738074U, 242 }, + { 10542197943230523224U, 269 }, { 15709099088952724970U, 295 }, + { 11704190886730495818U, 322 }, { 17440603504673385349U, 348 }, + { 12994262207056124023U, 375 }, { 9681479787123295682U, 402 }, + { 14426529090290212157U, 428 }, { 10748601772107342003U, 455 }, + { 16016664761464807395U, 481 }, { 11933345169920330789U, 508 }, + { 17782069995880619868U, 534 }, { 13248674568444952270U, 561 }, + { 9871031767461413346U, 588 }, { 14708983551653345445U, 614 }, + { 10959046745042015199U, 641 }, { 16330252207878254650U, 667 }, + { 12166986024289022870U, 694 }, { 18130221999122236476U, 720 }, + { 13508068024458167312U, 747 }, { 10064294952495520794U, 774 }, + { 14996968138956309548U, 800 }, { 11173611982879273257U, 827 }, + { 16649979327439178909U, 853 }, { 12405201291620119593U, 880 }, + { 9242595204427927429U, 907 }, { 13772540099066387757U, 933 }, + { 10261342003245940623U, 960 }, { 15290591125556738113U, 986 }, + { 11392378155556871081U, 1013 }, { 16975966327722178521U, 1039 }, + { 12648080533535911531U, 1066 } +}; + +static Fp find_cachedpow10(int exp, int* k) +{ + const double one_log_ten = 0.30102999566398114; + + int approx = (int)(-(exp + npowers) * one_log_ten); + int idx = (approx - firstpower) / steppowers; + + while(1) { + int current = exp + powers_ten[idx].exp + 64; + + if(current < expmin) { + idx++; + continue; + } + + if(current > expmax) { + idx--; + continue; + } + + *k = (firstpower + idx * steppowers); + + return powers_ten[idx]; + } +} + +#define fracmask 0x000FFFFFFFFFFFFFU +#define expmask 0x7FF0000000000000U +#define hiddenbit 0x0010000000000000U +#define signmask 0x8000000000000000U +#define expbias (1023 + 52) + +#define absv(n) ((n) < 0 ? -(n) : (n)) +#define minv(a, b) ((a) < (b) ? (a) : (b)) + +static const uint64_t tens[] = { + 10000000000000000000U, 1000000000000000000U, 100000000000000000U, + 10000000000000000U, 1000000000000000U, 100000000000000U, + 10000000000000U, 1000000000000U, 100000000000U, + 10000000000U, 1000000000U, 100000000U, + 10000000U, 1000000U, 100000U, + 10000U, 1000U, 100U, + 10U, 1U +}; + +static inline uint64_t get_dbits(double d) +{ + union { + double dbl; + uint64_t i; + } dbl_bits = { d }; + + return dbl_bits.i; +} + +static Fp build_fp(double d) +{ + uint64_t bits = get_dbits(d); + + Fp fp; + fp.frac = bits & fracmask; + fp.exp = (bits & expmask) >> 52; + + if(fp.exp) { + fp.frac += hiddenbit; + fp.exp -= expbias; + + } else { + fp.exp = -expbias + 1; + } + + return fp; +} + +static void normalize(Fp* fp) +{ + while ((fp->frac & hiddenbit) == 0) { + fp->frac <<= 1; + fp->exp--; + } + + int shift = 64 - 52 - 1; + fp->frac <<= shift; + fp->exp -= shift; +} + +static void get_normalized_boundaries(Fp* fp, Fp* lower, Fp* upper) +{ + upper->frac = (fp->frac << 1) + 1; + upper->exp = fp->exp - 1; + + while ((upper->frac & (hiddenbit << 1)) == 0) { + upper->frac <<= 1; + upper->exp--; + } + + int u_shift = 64 - 52 - 2; + + upper->frac <<= u_shift; + upper->exp = upper->exp - u_shift; + + + int l_shift = fp->frac == hiddenbit ? 2 : 1; + + lower->frac = (fp->frac << l_shift) - 1; + lower->exp = fp->exp - l_shift; + + + lower->frac <<= lower->exp - upper->exp; + lower->exp = upper->exp; +} + +static Fp multiply(Fp* a, Fp* b) +{ + const uint64_t lomask = 0x00000000FFFFFFFF; + + uint64_t ah_bl = (a->frac >> 32) * (b->frac & lomask); + uint64_t al_bh = (a->frac & lomask) * (b->frac >> 32); + uint64_t al_bl = (a->frac & lomask) * (b->frac & lomask); + uint64_t ah_bh = (a->frac >> 32) * (b->frac >> 32); + + uint64_t tmp = (ah_bl & lomask) + (al_bh & lomask) + (al_bl >> 32); + /* round up */ + tmp += 1U << 31; + + Fp fp = { + ah_bh + (ah_bl >> 32) + (al_bh >> 32) + (tmp >> 32), + a->exp + b->exp + 64 + }; + + return fp; +} + +static void round_digit(char* digits, int ndigits, uint64_t delta, uint64_t rem, uint64_t kappa, uint64_t frac) +{ + while (rem < frac && delta - rem >= kappa && + (rem + kappa < frac || frac - rem > rem + kappa - frac)) { + + digits[ndigits - 1]--; + rem += kappa; + } +} + +static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K) +{ + uint64_t wfrac = upper->frac - fp->frac; + uint64_t delta = upper->frac - lower->frac; + + Fp one; + one.frac = 1ULL << -upper->exp; + one.exp = upper->exp; + + uint64_t part1 = upper->frac >> -one.exp; + uint64_t part2 = upper->frac & (one.frac - 1); + + int idx = 0, kappa = 10; + const uint64_t* divp; + /* 1000000000 */ + for(divp = tens + 10; kappa > 0; divp++) { + + uint64_t div = *divp; + unsigned digit = (unsigned) (part1 / div); + + if (digit || idx) { + digits[idx++] = digit + '0'; + } + + part1 -= digit * div; + kappa--; + + uint64_t tmp = (part1 <<-one.exp) + part2; + if (tmp <= delta) { + *K += kappa; + round_digit(digits, idx, delta, tmp, div << -one.exp, wfrac); + + return idx; + } + } + + /* 10 */ + const uint64_t* unit = tens + 18; + + while(true) { + part2 *= 10; + delta *= 10; + kappa--; + + unsigned digit = (unsigned) (part2 >> -one.exp); + if (digit || idx) { + digits[idx++] = digit + '0'; + } + + part2 &= one.frac - 1; + if (part2 < delta) { + *K += kappa; + round_digit(digits, idx, delta, part2, one.frac, wfrac * *unit); + + return idx; + } + + unit--; + } +} + +static int grisu2(double d, char* digits, int* K) +{ + Fp w = build_fp(d); + + Fp lower, upper; + get_normalized_boundaries(&w, &lower, &upper); + + normalize(&w); + + int k; + Fp cp = find_cachedpow10(upper.exp, &k); + + w = multiply(&w, &cp); + upper = multiply(&upper, &cp); + lower = multiply(&lower, &cp); + + lower.frac++; + upper.frac--; + + *K = -k; + + return generate_digits(&w, &upper, &lower, digits, K); +} + +static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg) +{ + int exp = absv(K + ndigits - 1); + + if(K >= 0 && exp < 15) { + memcpy(dest, digits, ndigits); + memset(dest + ndigits, '0', K); + + /* add a .0 to mark this as a float. */ + dest[ndigits + K] = '.'; + dest[ndigits + K + 1] = '0'; + + return ndigits + K + 2; + } + + /* write decimal w/o scientific notation */ + if(K < 0 && (K > -7 || exp < 10)) { + int offset = ndigits - absv(K); + /* fp < 1.0 -> write leading zero */ + if(offset <= 0) { + offset = -offset; + dest[0] = '0'; + dest[1] = '.'; + memset(dest + 2, '0', offset); + memcpy(dest + offset + 2, digits, ndigits); + + return ndigits + 2 + offset; + + /* fp > 1.0 */ + } else { + memcpy(dest, digits, offset); + dest[offset] = '.'; + memcpy(dest + offset + 1, digits + offset, ndigits - offset); + + return ndigits + 1; + } + } + + /* write decimal w/ scientific notation */ + ndigits = minv(ndigits, 18 - neg); + + int idx = 0; + dest[idx++] = digits[0]; + + if(ndigits > 1) { + dest[idx++] = '.'; + memcpy(dest + idx, digits + 1, ndigits - 1); + idx += ndigits - 1; + } + + dest[idx++] = 'e'; + + char sign = K + ndigits - 1 < 0 ? '-' : '+'; + dest[idx++] = sign; + + int cent = 0; + + if(exp > 99) { + cent = exp / 100; + dest[idx++] = cent + '0'; + exp -= cent * 100; + } + if(exp > 9) { + int dec = exp / 10; + dest[idx++] = dec + '0'; + exp -= dec * 10; + + } else if(cent) { + dest[idx++] = '0'; + } + + dest[idx++] = exp % 10 + '0'; + + return idx; +} + +static int filter_special(double fp, char* dest) +{ + if(fp == 0.0) { + dest[0] = '0'; + dest[1] = '.'; + dest[2] = '0'; + return 3; + } + + uint64_t bits = get_dbits(fp); + + bool nan = (bits & expmask) == expmask; + + if(!nan) { + return 0; + } + + if(bits & fracmask) { + dest[0] = 'n'; dest[1] = 'a'; dest[2] = 'n'; + + } else { + dest[0] = 'i'; dest[1] = 'n'; dest[2] = 'f'; + } + + return 3; +} + +/* Fast and accurate double to string conversion based on Florian Loitsch's + * Grisu-algorithm[1]. + * + * Input: + * fp -> the double to convert, dest -> destination buffer. + * The generated string will never be longer than 32 characters. + * Make sure to pass a pointer to at least 32 bytes of memory. + * The emitted string will not be null terminated. + * + * + * + * Output: + * The number of written characters. + * + * Exemplary usage: + * + * void print(double d) + * { + * char buf[28 + 1] // plus null terminator + * int str_len = fpconv_dtoa(d, buf); + * + * buf[str_len] = '\0'; + * printf("%s", buf); + * } + * + */ +static int fpconv_dtoa(double d, char dest[32]) +{ + char digits[18]; + + int str_len = 0; + bool neg = false; + + if(get_dbits(d) & signmask) { + dest[0] = '-'; + str_len++; + neg = true; + } + + int spec = filter_special(d, dest + str_len); + + if(spec) { + return str_len + spec; + } + + int K = 0; + int ndigits = grisu2(d, digits, &K); + + str_len += emit_digits(digits, ndigits, dest + str_len, K, neg); +#if JSON_DEBUG + assert(str_len <= 32); +#endif + + return str_len; +} diff --git a/ext/json/vendor/jeaiii-ltoa.h b/ext/json/vendor/jeaiii-ltoa.h new file mode 100644 index 0000000000..ba4f497fc8 --- /dev/null +++ b/ext/json/vendor/jeaiii-ltoa.h @@ -0,0 +1,267 @@ +/* + +This file is released under the terms of the MIT License. It is based on the +work of James Edward Anhalt III, with the original license listed below. + +MIT License + +Copyright (c) 2024,2025 Enrico Thierbach - https://github.com/radiospiel +Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef JEAIII_TO_TEXT_H_ +#define JEAIII_TO_TEXT_H_ + +#include <stdint.h> + +typedef uint_fast32_t u32_t; +typedef uint_fast64_t u64_t; + +#define u32(x) ((u32_t)(x)) +#define u64(x) ((u64_t)(x)) + +struct digit_pair +{ + char dd[2]; +}; + +static const struct digit_pair *digits_dd = (struct digit_pair *)( + "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" +); + +static const struct digit_pair *digits_fd = (struct digit_pair *)( + "0_" "1_" "2_" "3_" "4_" "5_" "6_" "7_" "8_" "9_" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" +); + +static const u64_t mask24 = (u64(1) << 24) - 1; +static const u64_t mask32 = (u64(1) << 32) - 1; +static const u64_t mask57 = (u64(1) << 57) - 1; + +#define COPY(buffer, digits) memcpy(buffer, &(digits), sizeof(struct digit_pair)) + +static char * +jeaiii_ultoa(char *b, u64_t n) +{ + if (n < u32(1e2)) { + COPY(b, digits_fd[n]); + return n < 10 ? b + 1 : b + 2; + } + + if (n < u32(1e6)) { + if (n < u32(1e4)) { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * n); + COPY(b, digits_fd[f0 >> 24]); + + b -= n < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + + return b + 4; + } + + u64_t f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; + COPY(b, digits_fd[f0 >> 32]); + + b -= n < u32(1e5); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + return b + 6; + } + + if (n < u64(1ull << 32ull)) { + if (n < u32(1e8)) { + u64_t f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + COPY(b, digits_fd[f0 >> 32]); + + b -= n < u32(1e7); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + return b + 8; + } + + u64_t f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; + COPY(b, digits_fd[f0 >> 57]); + + b -= n < u32(1e9); + u64_t f2 = (f0 & mask57) * 100; + COPY(b + 2, digits_dd[f2 >> 57]); + + u64_t f4 = (f2 & mask57) * 100; + COPY(b + 4, digits_dd[f4 >> 57]); + + u64_t f6 = (f4 & mask57) * 100; + COPY(b + 6, digits_dd[f6 >> 57]); + + u64_t f8 = (f6 & mask57) * 100; + COPY(b + 8, digits_dd[f8 >> 57]); + + return b + 10; + } + + // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings + u32_t z = n % u32(1e8); + u64_t u = n / u32(1e8); + + if (u < u32(1e2)) { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + COPY(b, digits_dd[u]); + b += 2; + } + else if (u < u32(1e6)) { + if (u < u32(1e4)) { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * u); + COPY(b, digits_fd[f0 >> 24]); + + b -= u < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + b += 4; + } + else { + u64_t f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; + COPY(b, digits_fd[f0 >> 32]); + + b -= u < u32(1e5); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + b += 6; + } + } + else if (u < u32(1e8)) { + u64_t f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + COPY(b, digits_fd[f0 >> 32]); + + b -= u < u32(1e7); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + b += 8; + } + else if (u < u64(1ull << 32ull)) { + u64_t f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; + COPY(b, digits_fd[f0 >> 57]); + + b -= u < u32(1e9); + u64_t f2 = (f0 & mask57) * 100; + COPY(b + 2, digits_dd[f2 >> 57]); + + u64_t f4 = (f2 & mask57) * 100; + COPY(b + 4, digits_dd[f4 >> 57]); + + u64_t f6 = (f4 & mask57) * 100; + COPY(b + 6, digits_dd[f6 >> 57]); + + u64_t f8 = (f6 & mask57) * 100; + COPY(b + 8, digits_dd[f8 >> 57]); + b += 10; + } + else { + u32_t y = u % u32(1e8); + u /= u32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < u32(1e2)) { + COPY(b, digits_dd[u]); + b += 2; + } + else { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * u); + COPY(b, digits_fd[f0 >> 24]); + + b -= u < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + + b += 4; + } + // do 8 digits + u64_t f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + COPY(b, digits_dd[f0 >> 32]); + + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + b += 8; + } + + // do 8 digits + u64_t f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + COPY(b, digits_dd[f0 >> 32]); + + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + return b + 8; +} + +#undef u32 +#undef u64 +#undef COPY + +#endif // JEAIII_TO_TEXT_H_ diff --git a/ext/json/vendor/ryu.h b/ext/json/vendor/ryu.h new file mode 100644 index 0000000000..f06ec814b4 --- /dev/null +++ b/ext/json/vendor/ryu.h @@ -0,0 +1,819 @@ +// Copyright 2018 Ulf Adams +// +// The contents of this file may be used under the terms of the Apache License, +// Version 2.0. +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. +// +// --- +// +// Apache License +// Version 2.0, January 2004 +// http://www.apache.org/licenses/ +// +// TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +// +// 1. Definitions. +// +// "License" shall mean the terms and conditions for use, reproduction, +// and distribution as defined by Sections 1 through 9 of this document. +// +// "Licensor" shall mean the copyright owner or entity authorized by +// the copyright owner that is granting the License. +// +// "Legal Entity" shall mean the union of the acting entity and all +// other entities that control, are controlled by, or are under common +// control with that entity. For the purposes of this definition, +// "control" means (i) the power, direct or indirect, to cause the +// direction or management of such entity, whether by contract or +// otherwise, or (ii) ownership of fifty percent (50%) or more of the +// outstanding shares, or (iii) beneficial ownership of such entity. +// +// "You" (or "Your") shall mean an individual or Legal Entity +// exercising permissions granted by this License. +// +// "Source" form shall mean the preferred form for making modifications, +// including but not limited to software source code, documentation +// source, and configuration files. +// +// "Object" form shall mean any form resulting from mechanical +// transformation or translation of a Source form, including but +// not limited to compiled object code, generated documentation, +// and conversions to other media types. +// +// "Work" shall mean the work of authorship, whether in Source or +// Object form, made available under the License, as indicated by a +// copyright notice that is included in or attached to the work +// (an example is provided in the Appendix below). +// +// "Derivative Works" shall mean any work, whether in Source or Object +// form, that is based on (or derived from) the Work and for which the +// editorial revisions, annotations, elaborations, or other modifications +// represent, as a whole, an original work of authorship. For the purposes +// of this License, Derivative Works shall not include works that remain +// separable from, or merely link (or bind by name) to the interfaces of, +// the Work and Derivative Works thereof. +// +// "Contribution" shall mean any work of authorship, including +// the original version of the Work and any modifications or additions +// to that Work or Derivative Works thereof, that is intentionally +// submitted to Licensor for inclusion in the Work by the copyright owner +// or by an individual or Legal Entity authorized to submit on behalf of +// the copyright owner. For the purposes of this definition, "submitted" +// means any form of electronic, verbal, or written communication sent +// to the Licensor or its representatives, including but not limited to +// communication on electronic mailing lists, source code control systems, +// and issue tracking systems that are managed by, or on behalf of, the +// Licensor for the purpose of discussing and improving the Work, but +// excluding communication that is conspicuously marked or otherwise +// designated in writing by the copyright owner as "Not a Contribution." +// +// "Contributor" shall mean Licensor and any individual or Legal Entity +// on behalf of whom a Contribution has been received by Licensor and +// subsequently incorporated within the Work. +// +// 2. Grant of Copyright License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// copyright license to reproduce, prepare Derivative Works of, +// publicly display, publicly perform, sublicense, and distribute the +// Work and such Derivative Works in Source or Object form. +// +// 3. Grant of Patent License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// (except as stated in this section) patent license to make, have made, +// use, offer to sell, sell, import, and otherwise transfer the Work, +// where such license applies only to those patent claims licensable +// by such Contributor that are necessarily infringed by their +// Contribution(s) alone or by combination of their Contribution(s) +// with the Work to which such Contribution(s) was submitted. If You +// institute patent litigation against any entity (including a +// cross-claim or counterclaim in a lawsuit) alleging that the Work +// or a Contribution incorporated within the Work constitutes direct +// or contributory patent infringement, then any patent licenses +// granted to You under this License for that Work shall terminate +// as of the date such litigation is filed. +// +// 4. Redistribution. You may reproduce and distribute copies of the +// Work or Derivative Works thereof in any medium, with or without +// modifications, and in Source or Object form, provided that You +// meet the following conditions: +// +// (a) You must give any other recipients of the Work or +// Derivative Works a copy of this License; and +// +// (b) You must cause any modified files to carry prominent notices +// stating that You changed the files; and +// +// (c) You must retain, in the Source form of any Derivative Works +// that You distribute, all copyright, patent, trademark, and +// attribution notices from the Source form of the Work, +// excluding those notices that do not pertain to any part of +// the Derivative Works; and +// +// (d) If the Work includes a "NOTICE" text file as part of its +// distribution, then any Derivative Works that You distribute must +// include a readable copy of the attribution notices contained +// within such NOTICE file, excluding those notices that do not +// pertain to any part of the Derivative Works, in at least one +// of the following places: within a NOTICE text file distributed +// as part of the Derivative Works; within the Source form or +// documentation, if provided along with the Derivative Works; or, +// within a display generated by the Derivative Works, if and +// wherever such third-party notices normally appear. The contents +// of the NOTICE file are for informational purposes only and +// do not modify the License. You may add Your own attribution +// notices within Derivative Works that You distribute, alongside +// or as an addendum to the NOTICE text from the Work, provided +// that such additional attribution notices cannot be construed +// as modifying the License. +// +// You may add Your own copyright statement to Your modifications and +// may provide additional or different license terms and conditions +// for use, reproduction, or distribution of Your modifications, or +// for any such Derivative Works as a whole, provided Your use, +// reproduction, and distribution of the Work otherwise complies with +// the conditions stated in this License. +// +// 5. Submission of Contributions. Unless You explicitly state otherwise, +// any Contribution intentionally submitted for inclusion in the Work +// by You to the Licensor shall be under the terms and conditions of +// this License, without any additional terms or conditions. +// Notwithstanding the above, nothing herein shall supersede or modify +// the terms of any separate license agreement you may have executed +// with Licensor regarding such Contributions. +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor, +// except as required for reasonable and customary use in describing the +// origin of the Work and reproducing the content of the NOTICE file. +// +// 7. Disclaimer of Warranty. Unless required by applicable law or +// agreed to in writing, Licensor provides the Work (and each +// Contributor provides its Contributions) on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied, including, without limitation, any warranties or conditions +// of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +// PARTICULAR PURPOSE. You are solely responsible for determining the +// appropriateness of using or redistributing the Work and assume any +// risks associated with Your exercise of permissions under this License. +// +// 8. Limitation of Liability. In no event and under no legal theory, +// whether in tort (including negligence), contract, or otherwise, +// unless required by applicable law (such as deliberate and grossly +// negligent acts) or agreed to in writing, shall any Contributor be +// liable to You for damages, including any direct, indirect, special, +// incidental, or consequential damages of any character arising as a +// result of this License or out of the use or inability to use the +// Work (including but not limited to damages for loss of goodwill, +// work stoppage, computer failure or malfunction, or any and all +// other commercial damages or losses), even if such Contributor +// has been advised of the possibility of such damages. +// +// 9. Accepting Warranty or Additional Liability. While redistributing +// the Work or Derivative Works thereof, You may choose to offer, +// and charge a fee for, acceptance of support, warranty, indemnity, +// or other liability obligations and/or rights consistent with this +// License. However, in accepting such obligations, You may act only +// on Your own behalf and on Your sole responsibility, not on behalf +// of any other Contributor, and only if You agree to indemnify, +// defend, and hold each Contributor harmless for any liability +// incurred by, or claims asserted against, such Contributor by reason +// of your accepting any such warranty or additional liability. +// +// END OF TERMS AND CONDITIONS +// +// APPENDIX: How to apply the Apache License to your work. +// +// To apply the Apache License to your work, attach the following +// boilerplate notice, with the fields enclosed by brackets "[]" +// replaced with your own identifying information. (Don't include +// the brackets!) The text should be enclosed in the appropriate +// comment syntax for the file format. We also recommend that a +// file or class name and description of purpose be included on the +// same "printed page" as the copyright notice for easier +// identification within third-party archives. +// +// Copyright [yyyy] [name of copyright owner] +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// --- +// +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// --- +// Minimal Ryu implementation adapted for Ruby JSON gem by Josef Šimánek +// Optimized for pre-extracted mantissa/exponent from JSON parsing +// This is a stripped-down version containing only what's needed for +// converting decimal mantissa+exponent to IEEE 754 double precision. + +#ifndef RYU_H +#define RYU_H + +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +// Detect __builtin_clzll availability (for floor_log2) +// Note: MSVC doesn't have __builtin_clzll, so we provide a fallback +#ifdef __clang__ + #if __has_builtin(__builtin_clzll) + #define RYU_HAVE_BUILTIN_CLZLL 1 + #else + #define RYU_HAVE_BUILTIN_CLZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define RYU_HAVE_BUILTIN_CLZLL 1 +#else + #define RYU_HAVE_BUILTIN_CLZLL 0 +#endif + +// Count leading zeros (for floor_log2) +static inline uint32_t ryu_leading_zeros64(uint64_t input) +{ +#if RYU_HAVE_BUILTIN_CLZLL + return __builtin_clzll(input); +#else + // Fallback: binary search for the highest set bit + // This works on MSVC and other compilers without __builtin_clzll + if (input == 0) return 64; + uint32_t n = 0; + if (input <= 0x00000000FFFFFFFFULL) { n += 32; input <<= 32; } + if (input <= 0x0000FFFFFFFFFFFFULL) { n += 16; input <<= 16; } + if (input <= 0x00FFFFFFFFFFFFFFULL) { n += 8; input <<= 8; } + if (input <= 0x0FFFFFFFFFFFFFFFULL) { n += 4; input <<= 4; } + if (input <= 0x3FFFFFFFFFFFFFFFULL) { n += 2; input <<= 2; } + if (input <= 0x7FFFFFFFFFFFFFFFULL) { n += 1; } + return n; +#endif +} + +// These tables are generated by PrintDoubleLookupTable. +#define DOUBLE_POW5_INV_BITCOUNT 125 +#define DOUBLE_POW5_BITCOUNT 125 + +#define DOUBLE_POW5_INV_TABLE_SIZE 342 +#define DOUBLE_POW5_TABLE_SIZE 326 + +static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = { + { 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u }, + { 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u }, + { 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u }, + { 13026647942995916322u, 1208925819614629174u }, { 9774590264567735146u, 1934281311383406679u }, + { 11509021026396098440u, 1547425049106725343u }, { 16585914450600699399u, 1237940039285380274u }, + { 15469416676735388068u, 1980704062856608439u }, { 16064882156130220778u, 1584563250285286751u }, + { 9162556910162266299u, 1267650600228229401u }, { 7281393426775805432u, 2028240960365167042u }, + { 16893161185646375315u, 1622592768292133633u }, { 2446482504291369283u, 1298074214633706907u }, + { 7603720821608101175u, 2076918743413931051u }, { 2393627842544570617u, 1661534994731144841u }, + { 16672297533003297786u, 1329227995784915872u }, { 11918280793837635165u, 2126764793255865396u }, + { 5845275820328197809u, 1701411834604692317u }, { 15744267100488289217u, 1361129467683753853u }, + { 3054734472329800808u, 2177807148294006166u }, { 17201182836831481939u, 1742245718635204932u }, + { 6382248639981364905u, 1393796574908163946u }, { 2832900194486363201u, 2230074519853062314u }, + { 5955668970331000884u, 1784059615882449851u }, { 1075186361522890384u, 1427247692705959881u }, + { 12788344622662355584u, 2283596308329535809u }, { 13920024512871794791u, 1826877046663628647u }, + { 3757321980813615186u, 1461501637330902918u }, { 10384555214134712795u, 1169201309864722334u }, + { 5547241898389809503u, 1870722095783555735u }, { 4437793518711847602u, 1496577676626844588u }, + { 10928932444453298728u, 1197262141301475670u }, { 17486291911125277965u, 1915619426082361072u }, + { 6610335899416401726u, 1532495540865888858u }, { 12666966349016942027u, 1225996432692711086u }, + { 12888448528943286597u, 1961594292308337738u }, { 17689456452638449924u, 1569275433846670190u }, + { 14151565162110759939u, 1255420347077336152u }, { 7885109000409574610u, 2008672555323737844u }, + { 9997436015069570011u, 1606938044258990275u }, { 7997948812055656009u, 1285550435407192220u }, + { 12796718099289049614u, 2056880696651507552u }, { 2858676849947419045u, 1645504557321206042u }, + { 13354987924183666206u, 1316403645856964833u }, { 17678631863951955605u, 2106245833371143733u }, + { 3074859046935833515u, 1684996666696914987u }, { 13527933681774397782u, 1347997333357531989u }, + { 10576647446613305481u, 2156795733372051183u }, { 15840015586774465031u, 1725436586697640946u }, + { 8982663654677661702u, 1380349269358112757u }, { 18061610662226169046u, 2208558830972980411u }, + { 10759939715039024913u, 1766847064778384329u }, { 12297300586773130254u, 1413477651822707463u }, + { 15986332124095098083u, 2261564242916331941u }, { 9099716884534168143u, 1809251394333065553u }, + { 14658471137111155161u, 1447401115466452442u }, { 4348079280205103483u, 1157920892373161954u }, + { 14335624477811986218u, 1852673427797059126u }, { 7779150767507678651u, 1482138742237647301u }, + { 2533971799264232598u, 1185710993790117841u }, { 15122401323048503126u, 1897137590064188545u }, + { 12097921058438802501u, 1517710072051350836u }, { 5988988032009131678u, 1214168057641080669u }, + { 16961078480698431330u, 1942668892225729070u }, { 13568862784558745064u, 1554135113780583256u }, + { 7165741412905085728u, 1243308091024466605u }, { 11465186260648137165u, 1989292945639146568u }, + { 16550846638002330379u, 1591434356511317254u }, { 16930026125143774626u, 1273147485209053803u }, + { 4951948911778577463u, 2037035976334486086u }, { 272210314680951647u, 1629628781067588869u }, + { 3907117066486671641u, 1303703024854071095u }, { 6251387306378674625u, 2085924839766513752u }, + { 16069156289328670670u, 1668739871813211001u }, { 9165976216721026213u, 1334991897450568801u }, + { 7286864317269821294u, 2135987035920910082u }, { 16897537898041588005u, 1708789628736728065u }, + { 13518030318433270404u, 1367031702989382452u }, { 6871453250525591353u, 2187250724783011924u }, + { 9186511415162383406u, 1749800579826409539u }, { 11038557946871817048u, 1399840463861127631u }, + { 10282995085511086630u, 2239744742177804210u }, { 8226396068408869304u, 1791795793742243368u }, + { 13959814484210916090u, 1433436634993794694u }, { 11267656730511734774u, 2293498615990071511u }, + { 5324776569667477496u, 1834798892792057209u }, { 7949170070475892320u, 1467839114233645767u }, + { 17427382500606444826u, 1174271291386916613u }, { 5747719112518849781u, 1878834066219066582u }, + { 15666221734240810795u, 1503067252975253265u }, { 12532977387392648636u, 1202453802380202612u }, + { 5295368560860596524u, 1923926083808324180u }, { 4236294848688477220u, 1539140867046659344u }, + { 7078384693692692099u, 1231312693637327475u }, { 11325415509908307358u, 1970100309819723960u }, + { 9060332407926645887u, 1576080247855779168u }, { 14626963555825137356u, 1260864198284623334u }, + { 12335095245094488799u, 2017382717255397335u }, { 9868076196075591040u, 1613906173804317868u }, + { 15273158586344293478u, 1291124939043454294u }, { 13369007293925138595u, 2065799902469526871u }, + { 7005857020398200553u, 1652639921975621497u }, { 16672732060544291412u, 1322111937580497197u }, + { 11918976037903224966u, 2115379100128795516u }, { 5845832015580669650u, 1692303280103036413u }, + { 12055363241948356366u, 1353842624082429130u }, { 841837113407818570u, 2166148198531886609u }, + { 4362818505468165179u, 1732918558825509287u }, { 14558301248600263113u, 1386334847060407429u }, + { 12225235553534690011u, 2218135755296651887u }, { 2401490813343931363u, 1774508604237321510u }, + { 1921192650675145090u, 1419606883389857208u }, { 17831303500047873437u, 2271371013423771532u }, + { 6886345170554478103u, 1817096810739017226u }, { 1819727321701672159u, 1453677448591213781u }, + { 16213177116328979020u, 1162941958872971024u }, { 14873036941900635463u, 1860707134196753639u }, + { 15587778368262418694u, 1488565707357402911u }, { 8780873879868024632u, 1190852565885922329u }, + { 2981351763563108441u, 1905364105417475727u }, { 13453127855076217722u, 1524291284333980581u }, + { 7073153469319063855u, 1219433027467184465u }, { 11317045550910502167u, 1951092843947495144u }, + { 12742985255470312057u, 1560874275157996115u }, { 10194388204376249646u, 1248699420126396892u }, + { 1553625868034358140u, 1997919072202235028u }, { 8621598323911307159u, 1598335257761788022u }, + { 17965325103354776697u, 1278668206209430417u }, { 13987124906400001422u, 2045869129935088668u }, + { 121653480894270168u, 1636695303948070935u }, { 97322784715416134u, 1309356243158456748u }, + { 14913111714512307107u, 2094969989053530796u }, { 8241140556867935363u, 1675975991242824637u }, + { 17660958889720079260u, 1340780792994259709u }, { 17189487779326395846u, 2145249268790815535u }, + { 13751590223461116677u, 1716199415032652428u }, { 18379969808252713988u, 1372959532026121942u }, + { 14650556434236701088u, 2196735251241795108u }, { 652398703163629901u, 1757388200993436087u }, + { 11589965406756634890u, 1405910560794748869u }, { 7475898206584884855u, 2249456897271598191u }, + { 2291369750525997561u, 1799565517817278553u }, { 9211793429904618695u, 1439652414253822842u }, + { 18428218302589300235u, 2303443862806116547u }, { 7363877012587619542u, 1842755090244893238u }, + { 13269799239553916280u, 1474204072195914590u }, { 10615839391643133024u, 1179363257756731672u }, + { 2227947767661371545u, 1886981212410770676u }, { 16539753473096738529u, 1509584969928616540u }, + { 13231802778477390823u, 1207667975942893232u }, { 6413489186596184024u, 1932268761508629172u }, + { 16198837793502678189u, 1545815009206903337u }, { 5580372605318321905u, 1236652007365522670u }, + { 8928596168509315048u, 1978643211784836272u }, { 18210923379033183008u, 1582914569427869017u }, + { 7190041073742725760u, 1266331655542295214u }, { 436019273762630246u, 2026130648867672343u }, + { 7727513048493924843u, 1620904519094137874u }, { 9871359253537050198u, 1296723615275310299u }, + { 4726128361433549347u, 2074757784440496479u }, { 7470251503888749801u, 1659806227552397183u }, + { 13354898832594820487u, 1327844982041917746u }, { 13989140502667892133u, 2124551971267068394u }, + { 14880661216876224029u, 1699641577013654715u }, { 11904528973500979224u, 1359713261610923772u }, + { 4289851098633925465u, 2175541218577478036u }, { 18189276137874781665u, 1740432974861982428u }, + { 3483374466074094362u, 1392346379889585943u }, { 1884050330976640656u, 2227754207823337509u }, + { 5196589079523222848u, 1782203366258670007u }, { 15225317707844309248u, 1425762693006936005u }, + { 5913764258841343181u, 2281220308811097609u }, { 8420360221814984868u, 1824976247048878087u }, + { 17804334621677718864u, 1459980997639102469u }, { 17932816512084085415u, 1167984798111281975u }, + { 10245762345624985047u, 1868775676978051161u }, { 4507261061758077715u, 1495020541582440929u }, + { 7295157664148372495u, 1196016433265952743u }, { 7982903447895485668u, 1913626293225524389u }, + { 10075671573058298858u, 1530901034580419511u }, { 4371188443704728763u, 1224720827664335609u }, + { 14372599139411386667u, 1959553324262936974u }, { 15187428126271019657u, 1567642659410349579u }, + { 15839291315758726049u, 1254114127528279663u }, { 3206773216762499739u, 2006582604045247462u }, + { 13633465017635730761u, 1605266083236197969u }, { 14596120828850494932u, 1284212866588958375u }, + { 4907049252451240275u, 2054740586542333401u }, { 236290587219081897u, 1643792469233866721u }, + { 14946427728742906810u, 1315033975387093376u }, { 16535586736504830250u, 2104054360619349402u }, + { 5849771759720043554u, 1683243488495479522u }, { 15747863852001765813u, 1346594790796383617u }, + { 10439186904235184007u, 2154551665274213788u }, { 15730047152871967852u, 1723641332219371030u }, + { 12584037722297574282u, 1378913065775496824u }, { 9066413911450387881u, 2206260905240794919u }, + { 10942479943902220628u, 1765008724192635935u }, { 8753983955121776503u, 1412006979354108748u }, + { 10317025513452932081u, 2259211166966573997u }, { 874922781278525018u, 1807368933573259198u }, + { 8078635854506640661u, 1445895146858607358u }, { 13841606313089133175u, 1156716117486885886u }, + { 14767872471458792434u, 1850745787979017418u }, { 746251532941302978u, 1480596630383213935u }, + { 597001226353042382u, 1184477304306571148u }, { 15712597221132509104u, 1895163686890513836u }, + { 8880728962164096960u, 1516130949512411069u }, { 10793931984473187891u, 1212904759609928855u }, + { 17270291175157100626u, 1940647615375886168u }, { 2748186495899949531u, 1552518092300708935u }, + { 2198549196719959625u, 1242014473840567148u }, { 18275073973719576693u, 1987223158144907436u }, + { 10930710364233751031u, 1589778526515925949u }, { 12433917106128911148u, 1271822821212740759u }, + { 8826220925580526867u, 2034916513940385215u }, { 7060976740464421494u, 1627933211152308172u }, + { 16716827836597268165u, 1302346568921846537u }, { 11989529279587987770u, 2083754510274954460u }, + { 9591623423670390216u, 1667003608219963568u }, { 15051996368420132820u, 1333602886575970854u }, + { 13015147745246481542u, 2133764618521553367u }, { 3033420566713364587u, 1707011694817242694u }, + { 6116085268112601993u, 1365609355853794155u }, { 9785736428980163188u, 2184974969366070648u }, + { 15207286772667951197u, 1747979975492856518u }, { 1097782973908629988u, 1398383980394285215u }, + { 1756452758253807981u, 2237414368630856344u }, { 5094511021344956708u, 1789931494904685075u }, + { 4075608817075965366u, 1431945195923748060u }, { 6520974107321544586u, 2291112313477996896u }, + { 1527430471115325346u, 1832889850782397517u }, { 12289990821117991246u, 1466311880625918013u }, + { 17210690286378213644u, 1173049504500734410u }, { 9090360384495590213u, 1876879207201175057u }, + { 18340334751822203140u, 1501503365760940045u }, { 14672267801457762512u, 1201202692608752036u }, + { 16096930852848599373u, 1921924308174003258u }, { 1809498238053148529u, 1537539446539202607u }, + { 12515645034668249793u, 1230031557231362085u }, { 1578287981759648052u, 1968050491570179337u }, + { 12330676829633449412u, 1574440393256143469u }, { 13553890278448669853u, 1259552314604914775u }, + { 3239480371808320148u, 2015283703367863641u }, { 17348979556414297411u, 1612226962694290912u }, + { 6500486015647617283u, 1289781570155432730u }, { 10400777625036187652u, 2063650512248692368u }, + { 15699319729512770768u, 1650920409798953894u }, { 16248804598352126938u, 1320736327839163115u }, + { 7551343283653851484u, 2113178124542660985u }, { 6041074626923081187u, 1690542499634128788u }, + { 12211557331022285596u, 1352433999707303030u }, { 1091747655926105338u, 2163894399531684849u }, + { 4562746939482794594u, 1731115519625347879u }, { 7339546366328145998u, 1384892415700278303u }, + { 8053925371383123274u, 2215827865120445285u }, { 6443140297106498619u, 1772662292096356228u }, + { 12533209867169019542u, 1418129833677084982u }, { 5295740528502789974u, 2269007733883335972u }, + { 15304638867027962949u, 1815206187106668777u }, { 4865013464138549713u, 1452164949685335022u }, + { 14960057215536570740u, 1161731959748268017u }, { 9178696285890871890u, 1858771135597228828u }, + { 14721654658196518159u, 1487016908477783062u }, { 4398626097073393881u, 1189613526782226450u }, + { 7037801755317430209u, 1903381642851562320u }, { 5630241404253944167u, 1522705314281249856u }, + { 814844308661245011u, 1218164251424999885u }, { 1303750893857992017u, 1949062802279999816u }, + { 15800395974054034906u, 1559250241823999852u }, { 5261619149759407279u, 1247400193459199882u }, + { 12107939454356961969u, 1995840309534719811u }, { 5997002748743659252u, 1596672247627775849u }, + { 8486951013736837725u, 1277337798102220679u }, { 2511075177753209390u, 2043740476963553087u }, + { 13076906586428298482u, 1634992381570842469u }, { 14150874083884549109u, 1307993905256673975u }, + { 4194654460505726958u, 2092790248410678361u }, { 18113118827372222859u, 1674232198728542688u }, + { 3422448617672047318u, 1339385758982834151u }, { 16543964232501006678u, 2143017214372534641u }, + { 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u }, + { 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u }, + { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u }, + { 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u }, + { 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u }, + { 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u }, + { 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u }, + { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u }, + { 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u }, + { 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u }, + { 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u }, + { 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u }, + { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u }, + { 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u }, + { 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u }, + { 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u }, + { 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u }, + { 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u }, + { 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u }, + { 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u }, + { 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u }, + { 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u }, + { 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u }, + { 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u }, + { 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u }, + { 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u }, + { 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u }, + { 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u }, + { 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u } +}; + +static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = { + { 0u, 1152921504606846976u }, { 0u, 1441151880758558720u }, + { 0u, 1801439850948198400u }, { 0u, 2251799813685248000u }, + { 0u, 1407374883553280000u }, { 0u, 1759218604441600000u }, + { 0u, 2199023255552000000u }, { 0u, 1374389534720000000u }, + { 0u, 1717986918400000000u }, { 0u, 2147483648000000000u }, + { 0u, 1342177280000000000u }, { 0u, 1677721600000000000u }, + { 0u, 2097152000000000000u }, { 0u, 1310720000000000000u }, + { 0u, 1638400000000000000u }, { 0u, 2048000000000000000u }, + { 0u, 1280000000000000000u }, { 0u, 1600000000000000000u }, + { 0u, 2000000000000000000u }, { 0u, 1250000000000000000u }, + { 0u, 1562500000000000000u }, { 0u, 1953125000000000000u }, + { 0u, 1220703125000000000u }, { 0u, 1525878906250000000u }, + { 0u, 1907348632812500000u }, { 0u, 1192092895507812500u }, + { 0u, 1490116119384765625u }, { 4611686018427387904u, 1862645149230957031u }, + { 9799832789158199296u, 1164153218269348144u }, { 12249790986447749120u, 1455191522836685180u }, + { 15312238733059686400u, 1818989403545856475u }, { 14528612397897220096u, 2273736754432320594u }, + { 13692068767113150464u, 1421085471520200371u }, { 12503399940464050176u, 1776356839400250464u }, + { 15629249925580062720u, 2220446049250313080u }, { 9768281203487539200u, 1387778780781445675u }, + { 7598665485932036096u, 1734723475976807094u }, { 274959820560269312u, 2168404344971008868u }, + { 9395221924704944128u, 1355252715606880542u }, { 2520655369026404352u, 1694065894508600678u }, + { 12374191248137781248u, 2117582368135750847u }, { 14651398557727195136u, 1323488980084844279u }, + { 13702562178731606016u, 1654361225106055349u }, { 3293144668132343808u, 2067951531382569187u }, + { 18199116482078572544u, 1292469707114105741u }, { 8913837547316051968u, 1615587133892632177u }, + { 15753982952572452864u, 2019483917365790221u }, { 12152082354571476992u, 1262177448353618888u }, + { 15190102943214346240u, 1577721810442023610u }, { 9764256642163156992u, 1972152263052529513u }, + { 17631875447420442880u, 1232595164407830945u }, { 8204786253993389888u, 1540743955509788682u }, + { 1032610780636961552u, 1925929944387235853u }, { 2951224747111794922u, 1203706215242022408u }, + { 3689030933889743652u, 1504632769052528010u }, { 13834660704216955373u, 1880790961315660012u }, + { 17870034976990372916u, 1175494350822287507u }, { 17725857702810578241u, 1469367938527859384u }, + { 3710578054803671186u, 1836709923159824231u }, { 26536550077201078u, 2295887403949780289u }, + { 11545800389866720434u, 1434929627468612680u }, { 14432250487333400542u, 1793662034335765850u }, + { 8816941072311974870u, 2242077542919707313u }, { 17039803216263454053u, 1401298464324817070u }, + { 12076381983474541759u, 1751623080406021338u }, { 5872105442488401391u, 2189528850507526673u }, + { 15199280947623720629u, 1368455531567204170u }, { 9775729147674874978u, 1710569414459005213u }, + { 16831347453020981627u, 2138211768073756516u }, { 1296220121283337709u, 1336382355046097823u }, + { 15455333206886335848u, 1670477943807622278u }, { 10095794471753144002u, 2088097429759527848u }, + { 6309871544845715001u, 1305060893599704905u }, { 12499025449484531656u, 1631326116999631131u }, + { 11012095793428276666u, 2039157646249538914u }, { 11494245889320060820u, 1274473528905961821u }, + { 532749306367912313u, 1593091911132452277u }, { 5277622651387278295u, 1991364888915565346u }, + { 7910200175544436838u, 1244603055572228341u }, { 14499436237857933952u, 1555753819465285426u }, + { 8900923260467641632u, 1944692274331606783u }, { 12480606065433357876u, 1215432671457254239u }, + { 10989071563364309441u, 1519290839321567799u }, { 9124653435777998898u, 1899113549151959749u }, + { 8008751406574943263u, 1186945968219974843u }, { 5399253239791291175u, 1483682460274968554u }, + { 15972438586593889776u, 1854603075343710692u }, { 759402079766405302u, 1159126922089819183u }, + { 14784310654990170340u, 1448908652612273978u }, { 9257016281882937117u, 1811135815765342473u }, + { 16182956370781059300u, 2263919769706678091u }, { 7808504722524468110u, 1414949856066673807u }, + { 5148944884728197234u, 1768687320083342259u }, { 1824495087482858639u, 2210859150104177824u }, + { 1140309429676786649u, 1381786968815111140u }, { 1425386787095983311u, 1727233711018888925u }, + { 6393419502297367043u, 2159042138773611156u }, { 13219259225790630210u, 1349401336733506972u }, + { 16524074032238287762u, 1686751670916883715u }, { 16043406521870471799u, 2108439588646104644u }, + { 803757039314269066u, 1317774742903815403u }, { 14839754354425000045u, 1647218428629769253u }, + { 4714634887749086344u, 2059023035787211567u }, { 9864175832484260821u, 1286889397367007229u }, + { 16941905809032713930u, 1608611746708759036u }, { 2730638187581340797u, 2010764683385948796u }, + { 10930020904093113806u, 1256727927116217997u }, { 18274212148543780162u, 1570909908895272496u }, + { 4396021111970173586u, 1963637386119090621u }, { 5053356204195052443u, 1227273366324431638u }, + { 15540067292098591362u, 1534091707905539547u }, { 14813398096695851299u, 1917614634881924434u }, + { 13870059828862294966u, 1198509146801202771u }, { 12725888767650480803u, 1498136433501503464u }, + { 15907360959563101004u, 1872670541876879330u }, { 14553786618154326031u, 1170419088673049581u }, + { 4357175217410743827u, 1463023860841311977u }, { 10058155040190817688u, 1828779826051639971u }, + { 7961007781811134206u, 2285974782564549964u }, { 14199001900486734687u, 1428734239102843727u }, + { 13137066357181030455u, 1785917798878554659u }, { 11809646928048900164u, 2232397248598193324u }, + { 16604401366885338411u, 1395248280373870827u }, { 16143815690179285109u, 1744060350467338534u }, + { 10956397575869330579u, 2180075438084173168u }, { 6847748484918331612u, 1362547148802608230u }, + { 17783057643002690323u, 1703183936003260287u }, { 17617136035325974999u, 2128979920004075359u }, + { 17928239049719816230u, 1330612450002547099u }, { 17798612793722382384u, 1663265562503183874u }, + { 13024893955298202172u, 2079081953128979843u }, { 5834715712847682405u, 1299426220705612402u }, + { 16516766677914378815u, 1624282775882015502u }, { 11422586310538197711u, 2030353469852519378u }, + { 11750802462513761473u, 1268970918657824611u }, { 10076817059714813937u, 1586213648322280764u }, + { 12596021324643517422u, 1982767060402850955u }, { 5566670318688504437u, 1239229412751781847u }, + { 2346651879933242642u, 1549036765939727309u }, { 7545000868343941206u, 1936295957424659136u }, + { 4715625542714963254u, 1210184973390411960u }, { 5894531928393704067u, 1512731216738014950u }, + { 16591536947346905892u, 1890914020922518687u }, { 17287239619732898039u, 1181821263076574179u }, + { 16997363506238734644u, 1477276578845717724u }, { 2799960309088866689u, 1846595723557147156u }, + { 10973347230035317489u, 1154122327223216972u }, { 13716684037544146861u, 1442652909029021215u }, + { 12534169028502795672u, 1803316136286276519u }, { 11056025267201106687u, 2254145170357845649u }, + { 18439230838069161439u, 1408840731473653530u }, { 13825666510731675991u, 1761050914342066913u }, + { 3447025083132431277u, 2201313642927583642u }, { 6766076695385157452u, 1375821026829739776u }, + { 8457595869231446815u, 1719776283537174720u }, { 10571994836539308519u, 2149720354421468400u }, + { 6607496772837067824u, 1343575221513417750u }, { 17482743002901110588u, 1679469026891772187u }, + { 17241742735199000331u, 2099336283614715234u }, { 15387775227926763111u, 1312085177259197021u }, + { 5399660979626290177u, 1640106471573996277u }, { 11361262242960250625u, 2050133089467495346u }, + { 11712474920277544544u, 1281333180917184591u }, { 10028907631919542777u, 1601666476146480739u }, + { 7924448521472040567u, 2002083095183100924u }, { 14176152362774801162u, 1251301934489438077u }, + { 3885132398186337741u, 1564127418111797597u }, { 9468101516160310080u, 1955159272639746996u }, + { 15140935484454969608u, 1221974545399841872u }, { 479425281859160394u, 1527468181749802341u }, + { 5210967620751338397u, 1909335227187252926u }, { 17091912818251750210u, 1193334516992033078u }, + { 12141518985959911954u, 1491668146240041348u }, { 15176898732449889943u, 1864585182800051685u }, + { 11791404716994875166u, 1165365739250032303u }, { 10127569877816206054u, 1456707174062540379u }, + { 8047776328842869663u, 1820883967578175474u }, { 836348374198811271u, 2276104959472719343u }, + { 7440246761515338900u, 1422565599670449589u }, { 13911994470321561530u, 1778206999588061986u }, + { 8166621051047176104u, 2222758749485077483u }, { 2798295147690791113u, 1389224218428173427u }, + { 17332926989895652603u, 1736530273035216783u }, { 17054472718942177850u, 2170662841294020979u }, + { 8353202440125167204u, 1356664275808763112u }, { 10441503050156459005u, 1695830344760953890u }, + { 3828506775840797949u, 2119787930951192363u }, { 86973725686804766u, 1324867456844495227u }, + { 13943775212390669669u, 1656084321055619033u }, { 3594660960206173375u, 2070105401319523792u }, + { 2246663100128858359u, 1293815875824702370u }, { 12031700912015848757u, 1617269844780877962u }, + { 5816254103165035138u, 2021587305976097453u }, { 5941001823691840913u, 1263492066235060908u }, + { 7426252279614801142u, 1579365082793826135u }, { 4671129331091113523u, 1974206353492282669u }, + { 5225298841145639904u, 1233878970932676668u }, { 6531623551432049880u, 1542348713665845835u }, + { 3552843420862674446u, 1927935892082307294u }, { 16055585193321335241u, 1204959932551442058u }, + { 10846109454796893243u, 1506199915689302573u }, { 18169322836923504458u, 1882749894611628216u }, + { 11355826773077190286u, 1176718684132267635u }, { 9583097447919099954u, 1470898355165334544u }, + { 11978871809898874942u, 1838622943956668180u }, { 14973589762373593678u, 2298278679945835225u }, + { 2440964573842414192u, 1436424174966147016u }, { 3051205717303017741u, 1795530218707683770u }, + { 13037379183483547984u, 2244412773384604712u }, { 8148361989677217490u, 1402757983365377945u }, + { 14797138505523909766u, 1753447479206722431u }, { 13884737113477499304u, 2191809349008403039u }, + { 15595489723564518921u, 1369880843130251899u }, { 14882676136028260747u, 1712351053912814874u }, + { 9379973133180550126u, 2140438817391018593u }, { 17391698254306313589u, 1337774260869386620u }, + { 3292878744173340370u, 1672217826086733276u }, { 4116098430216675462u, 2090272282608416595u }, + { 266718509671728212u, 1306420176630260372u }, { 333398137089660265u, 1633025220787825465u }, + { 5028433689789463235u, 2041281525984781831u }, { 10060300083759496378u, 1275800953740488644u }, + { 12575375104699370472u, 1594751192175610805u }, { 1884160825592049379u, 1993438990219513507u }, + { 17318501580490888525u, 1245899368887195941u }, { 7813068920331446945u, 1557374211108994927u }, + { 5154650131986920777u, 1946717763886243659u }, { 915813323278131534u, 1216698602428902287u }, + { 14979824709379828129u, 1520873253036127858u }, { 9501408849870009354u, 1901091566295159823u }, + { 12855909558809837702u, 1188182228934474889u }, { 2234828893230133415u, 1485227786168093612u }, + { 2793536116537666769u, 1856534732710117015u }, { 8663489100477123587u, 1160334207943823134u }, + { 1605989338741628675u, 1450417759929778918u }, { 11230858710281811652u, 1813022199912223647u }, + { 9426887369424876662u, 2266277749890279559u }, { 12809333633531629769u, 1416423593681424724u }, + { 16011667041914537212u, 1770529492101780905u }, { 6179525747111007803u, 2213161865127226132u }, + { 13085575628799155685u, 1383226165704516332u }, { 16356969535998944606u, 1729032707130645415u }, + { 15834525901571292854u, 2161290883913306769u }, { 2979049660840976177u, 1350806802445816731u }, + { 17558870131333383934u, 1688508503057270913u }, { 8113529608884566205u, 2110635628821588642u }, + { 9682642023980241782u, 1319147268013492901u }, { 16714988548402690132u, 1648934085016866126u }, + { 11670363648648586857u, 2061167606271082658u }, { 11905663298832754689u, 1288229753919426661u }, + { 1047021068258779650u, 1610287192399283327u }, { 15143834390605638274u, 2012858990499104158u }, + { 4853210475701136017u, 1258036869061940099u }, { 1454827076199032118u, 1572546086327425124u }, + { 1818533845248790147u, 1965682607909281405u }, { 3442426662494187794u, 1228551629943300878u }, + { 13526405364972510550u, 1535689537429126097u }, { 3072948650933474476u, 1919611921786407622u }, + { 15755650962115585259u, 1199757451116504763u }, { 15082877684217093670u, 1499696813895630954u }, + { 9630225068416591280u, 1874621017369538693u }, { 8324733676974063502u, 1171638135855961683u }, + { 5794231077790191473u, 1464547669819952104u }, { 7242788847237739342u, 1830684587274940130u }, + { 18276858095901949986u, 2288355734093675162u }, { 16034722328366106645u, 1430222333808546976u }, + { 1596658836748081690u, 1787777917260683721u }, { 6607509564362490017u, 2234722396575854651u }, + { 1823850468512862308u, 1396701497859909157u }, { 6891499104068465790u, 1745876872324886446u }, + { 17837745916940358045u, 2182346090406108057u }, { 4231062170446641922u, 1363966306503817536u }, + { 5288827713058302403u, 1704957883129771920u }, { 6611034641322878003u, 2131197353912214900u }, + { 13355268687681574560u, 1331998346195134312u }, { 16694085859601968200u, 1664997932743917890u }, + { 11644235287647684442u, 2081247415929897363u }, { 4971804045566108824u, 1300779634956185852u }, + { 6214755056957636030u, 1625974543695232315u }, { 3156757802769657134u, 2032468179619040394u }, + { 6584659645158423613u, 1270292612261900246u }, { 17454196593302805324u, 1587865765327375307u }, + { 17206059723201118751u, 1984832206659219134u }, { 6142101308573311315u, 1240520129162011959u }, + { 3065940617289251240u, 1550650161452514949u }, { 8444111790038951954u, 1938312701815643686u }, + { 665883850346957067u, 1211445438634777304u }, { 832354812933696334u, 1514306798293471630u }, + { 10263815553021896226u, 1892883497866839537u }, { 17944099766707154901u, 1183052186166774710u }, + { 13206752671529167818u, 1478815232708468388u }, { 16508440839411459773u, 1848519040885585485u }, + { 12623618533845856310u, 1155324400553490928u }, { 15779523167307320387u, 1444155500691863660u }, + { 1277659885424598868u, 1805194375864829576u }, { 1597074856780748586u, 2256492969831036970u }, + { 5609857803915355770u, 1410308106144398106u }, { 16235694291748970521u, 1762885132680497632u }, + { 1847873790976661535u, 2203606415850622041u }, { 12684136165428883219u, 1377254009906638775u }, + { 11243484188358716120u, 1721567512383298469u }, { 219297180166231438u, 2151959390479123087u }, + { 7054589765244976505u, 1344974619049451929u }, { 13429923224983608535u, 1681218273811814911u }, + { 12175718012802122765u, 2101522842264768639u }, { 14527352785642408584u, 1313451776415480399u }, + { 13547504963625622826u, 1641814720519350499u }, { 12322695186104640628u, 2052268400649188124u }, + { 16925056528170176201u, 1282667750405742577u }, { 7321262604930556539u, 1603334688007178222u }, + { 18374950293017971482u, 2004168360008972777u }, { 4566814905495150320u, 1252605225005607986u }, + { 14931890668723713708u, 1565756531257009982u }, { 9441491299049866327u, 1957195664071262478u }, + { 1289246043478778550u, 1223247290044539049u }, { 6223243572775861092u, 1529059112555673811u }, + { 3167368447542438461u, 1911323890694592264u }, { 1979605279714024038u, 1194577431684120165u }, + { 7086192618069917952u, 1493221789605150206u }, { 18081112809442173248u, 1866527237006437757u }, + { 13606538515115052232u, 1166579523129023598u }, { 7784801107039039482u, 1458224403911279498u }, + { 507629346944023544u, 1822780504889099373u }, { 5246222702107417334u, 2278475631111374216u }, + { 3278889188817135834u, 1424047269444608885u }, { 8710297504448807696u, 1780059086805761106u } +}; + +// IEEE 754 double precision constants +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_EXPONENT_BIAS 1023 + +// Helper: floor(log2(value)) using ryu_leading_zeros64 +static inline uint32_t floor_log2(const uint64_t value) { + return 63 - ryu_leading_zeros64(value); +} + +// Helper: log2(5^e) approximation +static inline int32_t log2pow5(const int32_t e) { + return (int32_t) ((((uint32_t) e) * 1217359) >> 19); +} + +// Helper: ceil(log2(5^e)) +static inline int32_t ceil_log2pow5(const int32_t e) { + return log2pow5(e) + 1; +} + +// Helper: max of two int32 +static inline int32_t max32(int32_t a, int32_t b) { + return a < b ? b : a; +} + +// Helper: convert uint64 bits to double +static inline double int64Bits2Double(uint64_t bits) { + double f; + memcpy(&f, &bits, sizeof(double)); + return f; +} + +// Check if value is multiple of 2^p +static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) { + return (value & ((1ull << p) - 1)) == 0; +} + +// Count how many times value is divisible by 5 +// Uses modular inverse to avoid expensive division +static inline uint32_t pow5Factor(uint64_t value) { + const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64) + const uint64_t n_div_5 = 3689348814741910323u; // 2^64 / 5 + uint32_t count = 0; + for (;;) { + value *= m_inv_5; + if (value > n_div_5) + break; + ++count; + } + return count; +} + +// Check if value is multiple of 5^p +// Optimized: uses modular inverse instead of division +static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) { + return pow5Factor(value) >= p; +} + +// 128-bit multiplication with shift +// This is the core operation for converting decimal to binary +#if defined(__SIZEOF_INT128__) +// Use native 128-bit integers if available (GCC/Clang) +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + const unsigned __int128 b0 = ((unsigned __int128) m) * mul[0]; + const unsigned __int128 b2 = ((unsigned __int128) m) * mul[1]; + return (uint64_t) (((b0 >> 64) + b2) >> (j - 64)); +} +#else +// Fallback for systems without 128-bit integers +static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) { + const uint32_t aLo = (uint32_t)a; + const uint32_t aHi = (uint32_t)(a >> 32); + const uint32_t bLo = (uint32_t)b; + const uint32_t bHi = (uint32_t)(b >> 32); + + const uint64_t b00 = (uint64_t)aLo * bLo; + const uint64_t b01 = (uint64_t)aLo * bHi; + const uint64_t b10 = (uint64_t)aHi * bLo; + const uint64_t b11 = (uint64_t)aHi * bHi; + + const uint32_t b00Lo = (uint32_t)b00; + const uint32_t b00Hi = (uint32_t)(b00 >> 32); + + const uint64_t mid1 = b10 + b00Hi; + const uint32_t mid1Lo = (uint32_t)(mid1); + const uint32_t mid1Hi = (uint32_t)(mid1 >> 32); + + const uint64_t mid2 = b01 + mid1Lo; + const uint32_t mid2Lo = (uint32_t)(mid2); + const uint32_t mid2Hi = (uint32_t)(mid2 >> 32); + + const uint64_t pHi = b11 + mid1Hi + mid2Hi; + const uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo; + + *productHi = pHi; + return pLo; +} + +static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) { + return (hi << (64 - dist)) | (lo >> dist); +} + +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + uint64_t high1; + const uint64_t low1 = umul128(m, mul[1], &high1); + uint64_t high0; + umul128(m, mul[0], &high0); + const uint64_t sum = high0 + low1; + if (sum < high0) { + ++high1; + } + return shiftright128(sum, high1, j - 64); +} +#endif + +// Main conversion function: decimal mantissa+exponent to IEEE 754 double +// Optimized for JSON parsing with fast paths for edge cases +static inline double ryu_s2d_from_parts(uint64_t m10, int m10digits, int32_t e10, bool signedM) { + // Fast path: handle zero explicitly (e.g., "0.0", "0e0") + if (m10 == 0) { + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + // Fast path: handle overflow/underflow early + if (m10digits + e10 <= -324) { + // Underflow to zero + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + if (m10digits + e10 >= 310) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Convert decimal to binary: m10 * 10^e10 = m2 * 2^e2 + int32_t e2; + uint64_t m2; + bool trailingZeros; + + if (e10 >= 0) { + // Positive exponent: multiply by 5^e10 and adjust binary exponent + e2 = floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_SPLIT[e10], j); + trailingZeros = e2 < e10 || (e2 - e10 < 64 && multipleOfPowerOf2(m10, e2 - e10)); + } else { + // Negative exponent: divide by 5^(-e10) + e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_INV_SPLIT[-e10], j); + trailingZeros = multipleOfPowerOf5(m10, -e10); + } + + // Compute IEEE 754 exponent + uint32_t ieee_e2 = (uint32_t) max32(0, e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2)); + + if (ieee_e2 > 0x7fe) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Compute shift amount for rounding + int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS; + + // IEEE 754 round-to-even (banker's rounding) + trailingZeros &= (m2 & ((1ull << (shift - 1)) - 1)) == 0; + uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1; + bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0)); + + uint64_t ieee_m2 = (m2 >> shift) + roundUp; + ieee_m2 &= (1ull << DOUBLE_MANTISSA_BITS) - 1; + + if (ieee_m2 == 0 && roundUp) { + ieee_e2++; + } + + // Pack sign, exponent, and mantissa into IEEE 754 format + // Match original Ryu: group sign+exponent, then shift and add mantissa + uint64_t ieee = (((((uint64_t) signedM) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2; + return int64Bits2Double(ieee); +} + +#endif // RYU_H |
