diff options
Diffstat (limited to 'ext/json')
| -rw-r--r-- | ext/json/fbuffer/fbuffer.h | 204 | ||||
| -rw-r--r-- | ext/json/generator/depend | 5 | ||||
| -rw-r--r-- | ext/json/generator/extconf.rb | 9 | ||||
| -rw-r--r-- | ext/json/generator/generator.c | 1860 | ||||
| -rw-r--r-- | ext/json/json.gemspec | 10 | ||||
| -rw-r--r-- | ext/json/json.h | 134 | ||||
| -rw-r--r-- | ext/json/lib/json.rb | 92 | ||||
| -rw-r--r-- | ext/json/lib/json/add/core.rb | 1 | ||||
| -rw-r--r-- | ext/json/lib/json/add/string.rb | 35 | ||||
| -rw-r--r-- | ext/json/lib/json/add/symbol.rb | 9 | ||||
| -rw-r--r-- | ext/json/lib/json/common.rb | 663 | ||||
| -rw-r--r-- | ext/json/lib/json/ext.rb | 34 | ||||
| -rw-r--r-- | ext/json/lib/json/ext/generator/state.rb | 37 | ||||
| -rw-r--r-- | ext/json/lib/json/generic_object.rb | 8 | ||||
| -rw-r--r-- | ext/json/lib/json/version.rb | 2 | ||||
| -rw-r--r-- | ext/json/parser/depend | 5 | ||||
| -rw-r--r-- | ext/json/parser/extconf.rb | 14 | ||||
| -rw-r--r-- | ext/json/parser/parser.c | 3989 | ||||
| -rw-r--r-- | ext/json/parser/parser.rl | 1457 | ||||
| -rw-r--r-- | ext/json/parser/prereq.mk | 13 | ||||
| -rw-r--r-- | ext/json/simd/conf.rb | 24 | ||||
| -rw-r--r-- | ext/json/simd/simd.h | 208 | ||||
| -rw-r--r-- | ext/json/vendor/fpconv.c | 480 | ||||
| -rw-r--r-- | ext/json/vendor/jeaiii-ltoa.h | 267 | ||||
| -rw-r--r-- | ext/json/vendor/ryu.h | 819 |
25 files changed, 5076 insertions, 5303 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index 0774c7e464..b4f5266ca5 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -1,39 +1,8 @@ #ifndef _FBUFFER_H_ #define _FBUFFER_H_ -#include "ruby.h" -#include "ruby/encoding.h" - -/* shims */ -/* This is the fallback definition from Ruby 3.4 */ - -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include <cstdbool> -# endif -#elif defined(HAVE_STDBOOL_H) -# include <stdbool.h> -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -#ifndef RB_LIKELY -#define RB_LIKELY(expr) expr -#endif - -#ifndef MAYBE_UNUSED -# define MAYBE_UNUSED(x) x -#endif +#include "../json.h" +#include "../vendor/jeaiii-ltoa.h" enum fbuffer_type { FBUFFER_HEAP_ALLOCATED = 0, @@ -42,9 +11,12 @@ enum fbuffer_type { typedef struct FBufferStruct { enum fbuffer_type type; - unsigned long initial_length; - unsigned long len; - unsigned long capa; + size_t initial_length; + size_t len; + size_t capa; +#if JSON_DEBUG + size_t requested; +#endif char *ptr; VALUE io; } FBuffer; @@ -59,19 +31,13 @@ typedef struct FBufferStruct { #define FBUFFER_PAIR(fb) FBUFFER_PTR(fb), FBUFFER_LEN(fb) static void fbuffer_free(FBuffer *fb); -#ifndef JSON_GENERATOR static void fbuffer_clear(FBuffer *fb); -#endif -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); -#ifdef JSON_GENERATOR +static void fbuffer_append(FBuffer *fb, const char *newstr, size_t len); static void fbuffer_append_long(FBuffer *fb, long number); -#endif static inline void fbuffer_append_char(FBuffer *fb, char newchr); -#ifdef JSON_GENERATOR static VALUE fbuffer_finalize(FBuffer *fb); -#endif -static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) +static void fbuffer_stack_init(FBuffer *fb, size_t initial_length, char *stack_buffer, size_t stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; if (stack_buffer) { @@ -79,12 +45,26 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char * fb->ptr = stack_buffer; fb->capa = stack_buffer_size; } +#if JSON_DEBUG + fb->requested = 0; +#endif +} + +static inline void fbuffer_consumed(FBuffer *fb, size_t consumed) +{ +#if JSON_DEBUG + if (consumed > fb->requested) { + rb_bug("fbuffer: Out of bound write"); + } + fb->requested = 0; +#endif + fb->len += consumed; } static void fbuffer_free(FBuffer *fb) { if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { - ruby_xfree(fb->ptr); + JSON_SIZED_FREE_N(fb->ptr, fb->capa); } } @@ -99,7 +79,7 @@ static void fbuffer_flush(FBuffer *fb) fbuffer_clear(fb); } -static void fbuffer_realloc(FBuffer *fb, unsigned long required) +static void fbuffer_realloc(FBuffer *fb, size_t required) { if (required > fb->capa) { if (fb->type == FBUFFER_STACK_ALLOCATED) { @@ -108,13 +88,13 @@ static void fbuffer_realloc(FBuffer *fb, unsigned long required) fb->type = FBUFFER_HEAP_ALLOCATED; MEMCPY(fb->ptr, old_buffer, char, fb->len); } else { - REALLOC_N(fb->ptr, char, required); + JSON_SIZED_REALLOC_N(fb->ptr, char, required, fb->capa); } fb->capa = required; } } -static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_do_inc_capa(FBuffer *fb, size_t requested) { if (RB_UNLIKELY(fb->io)) { if (fb->capa < FBUFFER_IO_BUFFER_SIZE) { @@ -128,7 +108,7 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) } } - unsigned long required; + size_t required; if (RB_UNLIKELY(!fb->ptr)) { fb->ptr = ALLOC_N(char, fb->initial_length); @@ -140,75 +120,141 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) fbuffer_realloc(fb, required); } -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static inline void fbuffer_inc_capa(FBuffer *fb, size_t requested) { +#if JSON_DEBUG + fb->requested = requested; +#endif + if (RB_UNLIKELY(requested > fb->capa - fb->len)) { fbuffer_do_inc_capa(fb, requested); } } -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len) +static inline size_t fbuffer_size_mul_or_raise(size_t a, size_t b) +{ + size_t result = a * b; + if (RB_UNLIKELY(a != 0 && (result / a) != b)) { + rb_raise(rb_eArgError, "Buffer overflow, the resulting document is too large to be generated"); + } + return result; +} + +static inline void fbuffer_append_reserved(FBuffer *fb, const char *newstr, size_t len) +{ + MEMCPY(fb->ptr + fb->len, newstr, char, len); + fbuffer_consumed(fb, len); +} + +static inline void fbuffer_append(FBuffer *fb, const char *newstr, size_t len) { if (len > 0) { fbuffer_inc_capa(fb, len); - MEMCPY(fb->ptr + fb->len, newstr, char, len); - fb->len += len; + fbuffer_append_reserved(fb, newstr, len); + } +} + +/* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */ +static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr) +{ +#if JSON_DEBUG + if (fb->requested < 1) { + rb_bug("fbuffer: unreserved write"); } + fb->requested--; +#endif + + fb->ptr[fb->len] = chr; + fb->len++; } -#ifdef JSON_GENERATOR static void fbuffer_append_str(FBuffer *fb, VALUE str) { - const char *newstr = StringValuePtr(str); - unsigned long len = RSTRING_LEN(str); + const char *ptr; + size_t len; + RSTRING_GETMEM(str, ptr, len); + fbuffer_append(fb, ptr, len); RB_GC_GUARD(str); - - fbuffer_append(fb, newstr, len); } + +static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat) +{ + const char *ptr; + size_t len; + RSTRING_GETMEM(str, ptr, len); + + fbuffer_inc_capa(fb, fbuffer_size_mul_or_raise(repeat, len)); + while (repeat) { +#if JSON_DEBUG + fb->requested = len; #endif + fbuffer_append_reserved(fb, ptr, len); + repeat--; + } + RB_GC_GUARD(str); +} static inline void fbuffer_append_char(FBuffer *fb, char newchr) { fbuffer_inc_capa(fb, 1); *(fb->ptr + fb->len) = newchr; - fb->len++; + fbuffer_consumed(fb, 1); } -#ifdef JSON_GENERATOR -static long fltoa(long number, char *buf) +static inline char *fbuffer_cursor(FBuffer *fb) { - static const char digits[] = "0123456789"; - long sign = number; - char* tmp = buf; + return fb->ptr + fb->len; +} - if (sign < 0) number = -number; - do *tmp-- = digits[number % 10]; while (number /= 10); - if (sign < 0) *tmp-- = '-'; - return buf - tmp; +static inline void fbuffer_advance_to(FBuffer *fb, char *end) +{ + fbuffer_consumed(fb, (end - fb->ptr) - fb->len); } -#define LONG_BUFFER_SIZE 20 +/* + * Appends the decimal string representation of \a number into the buffer. + */ static void fbuffer_append_long(FBuffer *fb, long number) { - char buf[LONG_BUFFER_SIZE]; - char *buffer_end = buf + LONG_BUFFER_SIZE; - long len = fltoa(number, buffer_end - 1); - fbuffer_append(fb, buffer_end - len, len); + /* + * The jeaiii_ultoa() function produces digits left-to-right, + * allowing us to write directly into the buffer, but we don't know + * the number of resulting characters. + * + * We do know, however, that the `number` argument is always in the + * range 0xc000000000000000 to 0x3fffffffffffffff, or, in decimal, + * -4611686018427387904 to 4611686018427387903. The max number of chars + * generated is therefore 20 (including a potential sign character). + */ + + static const int MAX_CHARS_FOR_LONG = 20; + + fbuffer_inc_capa(fb, MAX_CHARS_FOR_LONG); + + if (number < 0) { + fbuffer_append_reserved_char(fb, '-'); + + /* + * Since number is always > LONG_MIN, `-number` will not overflow + * and is always the positive abs() value. + */ + number = -number; + } + + char *end = jeaiii_ultoa(fbuffer_cursor(fb), number); + fbuffer_advance_to(fb, end); } static VALUE fbuffer_finalize(FBuffer *fb) { if (fb->io) { fbuffer_flush(fb); - fbuffer_free(fb); rb_io_flush(fb->io); return fb->io; } else { - VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); - fbuffer_free(fb); - return result; + return rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); } } -#endif -#endif + +#endif // _FBUFFER_H_ diff --git a/ext/json/generator/depend b/ext/json/generator/depend index 65be7b8665..3ba4acfdd2 100644 --- a/ext/json/generator/depend +++ b/ext/json/generator/depend @@ -142,6 +142,7 @@ generator.o: $(hdrdir)/ruby/internal/intern/re.h generator.o: $(hdrdir)/ruby/internal/intern/ruby.h generator.o: $(hdrdir)/ruby/internal/intern/select.h generator.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +generator.o: $(hdrdir)/ruby/internal/intern/set.h generator.o: $(hdrdir)/ruby/internal/intern/signal.h generator.o: $(hdrdir)/ruby/internal/intern/sprintf.h generator.o: $(hdrdir)/ruby/internal/intern/string.h @@ -177,5 +178,9 @@ generator.o: $(hdrdir)/ruby/ruby.h generator.o: $(hdrdir)/ruby/st.h generator.o: $(hdrdir)/ruby/subst.h generator.o: $(srcdir)/../fbuffer/fbuffer.h +generator.o: $(srcdir)/../json.h +generator.o: $(srcdir)/../simd/simd.h +generator.o: $(srcdir)/../vendor/fpconv.c +generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h generator.o: generator.c # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb index 078068cf63..33af03ea30 100644 --- a/ext/json/generator/extconf.rb +++ b/ext/json/generator/extconf.rb @@ -5,6 +5,15 @@ if RUBY_ENGINE == 'truffleruby' File.write('Makefile', dummy_makefile("").join) else append_cflags("-std=c99") + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 + have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + $defs << "-DJSON_GENERATOR" + $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" + + if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" + end + create_makefile 'json/ext/generator' end diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index d5c8bfd42a..110b5f6b32 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1,37 +1,46 @@ -#include "ruby.h" +#include "../json.h" #include "../fbuffer/fbuffer.h" +#include "../vendor/fpconv.c" #include <math.h> #include <ctype.h> +#include "../simd/simd.h" + /* ruby api and some helpers */ +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + typedef struct JSON_Generator_StateStruct { VALUE indent; VALUE space; VALUE space_before; VALUE object_nl; VALUE array_nl; + VALUE as_json; long max_nesting; long depth; long buffer_initial_length; + enum duplicate_key_action on_duplicate_key; + + bool as_json_single_arg; bool allow_nan; bool ascii_only; bool script_safe; bool strict; } JSON_Generator_State; -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(cond) (cond) -#endif +static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; -static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8; - -static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; -static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, - sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict; +static ID i_to_s, i_to_json, i_new, i_encode; +static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; #define GET_STATE_TO(self, state) \ @@ -43,7 +52,7 @@ static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, struct generate_json_data; -typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj); struct generate_json_data { FBuffer *buffer; @@ -51,43 +60,39 @@ struct generate_json_data { JSON_Generator_State *state; VALUE obj; generator_func func; + long depth; }; +static SIMD_Implementation simd_impl; + static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); -static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -#endif -static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); -static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj); +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static int usascii_encindex, utf8_encindex, binary_encindex; -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_generator_error_str(VALUE invalid_object, VALUE str) +NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str) { + rb_enc_associate_index(str, utf8_encindex); VALUE exc = rb_exc_new_str(eGeneratorError, str); rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object); rb_exc_raise(exc); } -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif #ifdef RBIMPL_ATTR_FORMAT RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) #endif -static void raise_generator_error(VALUE invalid_object, const char *fmt, ...) +NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...) { va_list args; va_start(args, fmt); @@ -96,6 +101,98 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...) raise_generator_error_str(invalid_object, str); } +// 0 - single byte char that don't need to be escaped. +// (x | 8) - char that needs to be escaped. +static const unsigned char CHAR_LENGTH_MASK = 7; +static const unsigned char ESCAPE_MASK = 8; + +typedef struct _search_state { + const char *ptr; + const char *end; + const char *cursor; + FBuffer *buffer; + +#ifdef HAVE_SIMD + const char *chunk_base; + const char *chunk_end; + bool has_matches; + +#if defined(HAVE_SIMD_NEON) + uint64_t matches_mask; +#elif defined(HAVE_SIMD_SSE2) + int matches_mask; +#else +#error "Unknown SIMD Implementation." +#endif /* HAVE_SIMD_NEON */ +#endif /* HAVE_SIMD */ +} search_state; + +ALWAYS_INLINE(static) void search_flush(search_state *search) +{ + // Do not remove this conditional without profiling, specifically escape-heavy text. + // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush). + // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method + // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the + // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if + // nothing needs to be flushed, we can save a few memory references with this conditional. + if (search->ptr > search->cursor) { + fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); + search->cursor = search->ptr; + } +} + +static const unsigned char escape_table_basic[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static inline unsigned char search_escape_basic(search_state *search) +{ + while (search->ptr < search->end) { + if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) { + search_flush(search); + return 1; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; +} + +ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search) +{ + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + search->ptr++; + search->cursor = search->ptr; +} + /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. * @@ -106,564 +203,501 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...) * * - If out_ascii_only: non-ASCII characters (>0x7F) * - * - If out_script_safe: forwardslash, line separator (U+2028), and + * - If script_safe: forwardslash (/), line separator (U+2028), and * paragraph separator (U+2029) * * Everything else (should be UTF-8) is just passed through and * appended to the result. */ -static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe) -{ - const char *hexdig = "0123456789abcdef"; - char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; - - const char *ptr = RSTRING_PTR(str); - unsigned long len = RSTRING_LEN(str); - unsigned long beg = 0, pos = 0; -#define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos; +#if defined(HAVE_SIMD_NEON) +static inline unsigned char search_escape_basic_neon(search_state *search); +#elif defined(HAVE_SIMD_SSE2) +static inline unsigned char search_escape_basic_sse2(search_state *search); +#endif - while (pos < len) { - unsigned char ch = ptr[pos]; - unsigned char ch_len = escape_table[ch]; - /* JSON encoding */ +static inline unsigned char search_escape_basic(search_state *search); - if (RB_UNLIKELY(ch_len)) { - switch (ch_len) { - case 1: { - FLUSH_POS(1); - switch (ch) { - case '"': fbuffer_append(out_buffer, "\\\"", 2); break; - case '\\': fbuffer_append(out_buffer, "\\\\", 2); break; - case '/': fbuffer_append(out_buffer, "\\/", 2); break; - case '\b': fbuffer_append(out_buffer, "\\b", 2); break; - case '\f': fbuffer_append(out_buffer, "\\f", 2); break; - case '\n': fbuffer_append(out_buffer, "\\n", 2); break; - case '\r': fbuffer_append(out_buffer, "\\r", 2); break; - case '\t': fbuffer_append(out_buffer, "\\t", 2); break; - default: { - scratch[2] = '0'; - scratch[3] = '0'; - scratch[4] = hexdig[(ch >> 4) & 0xf]; - scratch[5] = hexdig[ch & 0xf]; - fbuffer_append(out_buffer, scratch, 6); - break; - } - } - break; - } - case 3: { - unsigned char b2 = ptr[pos + 1]; - if (RB_UNLIKELY(out_script_safe && ch == 0xE2 && b2 == 0x80)) { - unsigned char b3 = ptr[pos + 2]; - if (b3 == 0xA8) { - FLUSH_POS(3); - fbuffer_append(out_buffer, "\\u2028", 6); - break; - } else if (b3 == 0xA9) { - FLUSH_POS(3); - fbuffer_append(out_buffer, "\\u2029", 6); - break; - } - } - // fallthrough - } - default: - pos += ch_len; - break; - } - } else { - pos++; +static inline void convert_UTF8_to_JSON(search_state *search) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + while (search_escape_basic_neon(search)) { + escape_UTF8_char_basic(search); + } +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + while (search_escape_basic_sse2(search)) { + escape_UTF8_char_basic(search); } + return; } -#undef FLUSH_POS - - if (beg < len) { - fbuffer_append(out_buffer, &ptr[beg], len - beg); + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); } - - RB_GC_GUARD(str); +#endif +#else + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif /* HAVE_SIMD */ } -static const char escape_table[256] = { - // ASCII Control Characters - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - // ASCII Characters - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"' - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\' - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - // Continuation byte - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - // First byte of a 2-byte code point - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - // First byte of a 4-byte code point - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - //First byte of a 4+byte code point - 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, -}; - -static const char script_safe_escape_table[256] = { - // ASCII Control Characters - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - // ASCII Characters - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/' - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\' - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - // Continuation byte - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - // First byte of a 2-byte code point - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - // First byte of a 4-byte code point - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - //First byte of a 4+byte code point - 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1, -}; - -static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256]) +static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) { - const char *hexdig = "0123456789abcdef"; - char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; - - const char *ptr = RSTRING_PTR(str); - unsigned long len = RSTRING_LEN(str); - - unsigned long beg = 0, pos; - - for (pos = 0; pos < len;) { - unsigned char ch = ptr[pos]; - /* JSON encoding */ - if (escape_table[ch]) { - if (pos > beg) { - fbuffer_append(out_buffer, &ptr[beg], pos - beg); - } - - beg = pos + 1; + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { switch (ch) { - case '"': fbuffer_append(out_buffer, "\\\"", 2); break; - case '\\': fbuffer_append(out_buffer, "\\\\", 2); break; - case '/': fbuffer_append(out_buffer, "\\/", 2); break; - case '\b': fbuffer_append(out_buffer, "\\b", 2); break; - case '\f': fbuffer_append(out_buffer, "\\f", 2); break; - case '\n': fbuffer_append(out_buffer, "\\n", 2); break; - case '\r': fbuffer_append(out_buffer, "\\r", 2); break; - case '\t': fbuffer_append(out_buffer, "\\t", 2); break; - default: - scratch[2] = '0'; - scratch[3] = '0'; + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; - fbuffer_append(out_buffer, scratch, 6); + fbuffer_append(search->buffer, scratch, 6); + break; + } } + break; + } + case 3: { + if (search->ptr[2] & 1) { + fbuffer_append(search->buffer, "\\u2029", 6); + } else { + fbuffer_append(search->buffer, "\\u2028", 6); + } + break; } - - pos++; - } - - if (beg < len) { - fbuffer_append(out_buffer, &ptr[beg], len - beg); } - - RB_GC_GUARD(str); + search->cursor = (search->ptr += ch_len); } -static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe) +#ifdef HAVE_SIMD + +ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) { - const char *hexdig = "0123456789abcdef"; - char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; + RBIMPL_ASSERT_OR_ASSUME(len < vec_len); - const char *ptr = RSTRING_PTR(str); - unsigned long len = RSTRING_LEN(str); + // Flush the buffer so everything up until the last 'len' characters are unflushed. + search_flush(search); - unsigned long beg = 0, pos = 0; + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, vec_len); -#define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos; + char *s = (buf->ptr + buf->len); - while (pos < len) { - unsigned char ch = ptr[pos]; - unsigned char ch_len = escape_table[ch]; + // Pad the buffer with dummy characters that won't need escaping. + // This seem wasteful at first sight, but memset of vector length is very fast. + // This is a space as it can be directly represented as an immediate on AArch64. + memset(s, ' ', vec_len); - if (RB_UNLIKELY(ch_len)) { - switch (ch_len) { - case 1: { - FLUSH_POS(1); - switch (ch) { - case '"': fbuffer_append(out_buffer, "\\\"", 2); break; - case '\\': fbuffer_append(out_buffer, "\\\\", 2); break; - case '/': fbuffer_append(out_buffer, "\\/", 2); break; - case '\b': fbuffer_append(out_buffer, "\\b", 2); break; - case '\f': fbuffer_append(out_buffer, "\\f", 2); break; - case '\n': fbuffer_append(out_buffer, "\\n", 2); break; - case '\r': fbuffer_append(out_buffer, "\\r", 2); break; - case '\t': fbuffer_append(out_buffer, "\\t", 2); break; - default: { - scratch[2] = '0'; - scratch[3] = '0'; - scratch[4] = hexdig[(ch >> 4) & 0xf]; - scratch[5] = hexdig[ch & 0xf]; - fbuffer_append(out_buffer, scratch, 6); - break; - } - } - break; - } - default: { - uint32_t wchar = 0; - switch(ch_len) { - case 2: - wchar = ptr[pos] & 0x1F; - break; - case 3: - wchar = ptr[pos] & 0x0F; - break; - case 4: - wchar = ptr[pos] & 0x07; - break; - } + // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + if (vec_len == 16) { + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); + json_fast_memcpy16(s, search->ptr, len); + } else { + MEMCPY(s, search->ptr, char, len); + } - for (short i = 1; i < ch_len; i++) { - wchar = (wchar << 6) | (ptr[pos+i] & 0x3F); - } + return s; +} - FLUSH_POS(ch_len); +#ifdef HAVE_SIMD_NEON - if (wchar <= 0xFFFF) { - scratch[2] = hexdig[wchar >> 12]; - scratch[3] = hexdig[(wchar >> 8) & 0xf]; - scratch[4] = hexdig[(wchar >> 4) & 0xf]; - scratch[5] = hexdig[wchar & 0xf]; - fbuffer_append(out_buffer, scratch, 6); - } else { - uint16_t hi, lo; - wchar -= 0x10000; - hi = 0xD800 + (uint16_t)(wchar >> 10); - lo = 0xDC00 + (uint16_t)(wchar & 0x3FF); - - scratch[2] = hexdig[hi >> 12]; - scratch[3] = hexdig[(hi >> 8) & 0xf]; - scratch[4] = hexdig[(hi >> 4) & 0xf]; - scratch[5] = hexdig[hi & 0xf]; - - scratch[8] = hexdig[lo >> 12]; - scratch[9] = hexdig[(lo >> 8) & 0xf]; - scratch[10] = hexdig[(lo >> 4) & 0xf]; - scratch[11] = hexdig[lo & 0xf]; - - fbuffer_append(out_buffer, scratch, 12); - } +ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search) +{ + uint64_t mask = search->matches_mask; + uint32_t index = trailing_zeros64(mask) >> 2; - break; - } - } + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} + +static inline unsigned char search_escape_basic_neon(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return neon_next_match(search); } else { - pos++; + // neon_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + search->ptr = search->chunk_end; } } -#undef FLUSH_POS - if (beg < len) { - fbuffer_append(out_buffer, &ptr[beg], len - beg); + /* + * The code below implements an SIMD-based algorithm to determine if N bytes at a time + * need to be escaped. + * + * Assume the ptr = "Te\sting!" (the double quotes are included in the string) + * + * The explanation will be limited to the first 8 bytes of the string for simplicity. However + * the vector insructions may work on larger vectors. + * + * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers. + * + * lower_bound: [20 20 20 20 20 20 20 20] + * backslash: [5C 5C 5C 5C 5C 5C 5C 5C] + * dblquote: [22 22 22 22 22 22 22 22] + * + * Next we load the first chunk of the ptr: + * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n) + * + * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector + * as no bytes are less than 32 (0x20): + * [0 0 0 0 0 0 0 0] + * + * Next, we check if any byte in chunk is equal to a backslash: + * [0 0 0 FF 0 0 0 0] + * + * Finally we check if any byte in chunk is equal to a double quote: + * [FF 0 0 0 0 0 0 0] + * + * Now we have three vectors where each byte indicates if the corresponding byte in chunk + * needs to be escaped. We combine these vectors with a series of logical OR instructions. + * This is the needs_escape vector and it is equal to: + * [FF 0 0 FF 0 0 0 0] + * + * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of + * the values in the vector. This computes how many bytes need to be escaped within this chunk. + * + * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then, + * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we + * have at least one byte that needs to be escaped. + */ + + if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(uint8x16_t); + return neon_next_match(search); } - RB_GC_GUARD(str); -} + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining); -/* - * Document-module: JSON::Ext::Generator - * - * This is the JSON generator implemented as a C extension. It can be - * configured to be used by setting - * - * JSON.generator = JSON::Ext::Generator - * - * with the method generator= in JSON. - * - */ + uint64_t mask = compute_chunk_mask_neon(s); -/* Explanation of the following: that's the only way to not pollute - * standard library's docs with GeneratorMethods::<ClassName> which - * are uninformative and take a large place in a list of classes - */ + if (!mask) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods - * :nodoc: - */ + search->matches_mask = mask; + search->has_matches = true; + search->chunk_end = search->end; + search->chunk_base = search->ptr; + return neon_next_match(search); + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Array - * :nodoc: - */ + if (search->ptr < search->end) { + return search_escape_basic(search); + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Bignum - * :nodoc: - */ + search_flush(search); + return 0; +} +#endif /* HAVE_SIMD_NEON */ -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::FalseClass - * :nodoc: - */ +#ifdef HAVE_SIMD_SSE2 -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Fixnum - * :nodoc: - */ +ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search) +{ + int mask = search->matches_mask; + int index = trailing_zeros(mask); -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Float - * :nodoc: - */ + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; +} -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Hash - * :nodoc: - */ +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Integer - * :nodoc: - */ +ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + return sse2_next_match(search); + } else { + // sse2_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = false; + if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) { + search->ptr = search->end; + } else { + search->ptr = search->chunk_base + sizeof(__m128i); + } + } + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::NilClass - * :nodoc: - */ + if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) { + search->has_matches = true; + search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(__m128i); + return sse2_next_match(search); + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Object - * :nodoc: - */ + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= SIMD_MINIMUM_THRESHOLD) { + char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining); -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::String - * :nodoc: - */ + int needs_escape_mask = compute_chunk_mask_sse2(s); -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::String::Extend - * :nodoc: - */ + if (needs_escape_mask == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + fbuffer_consumed(search->buffer, remaining); + search->ptr = search->end; + search->cursor = search->end; + return 0; + } -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::TrueClass - * :nodoc: - */ + search->has_matches = true; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON object, that is generated from - * this Hash instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_object, Qfalse); -} + if (search->ptr < search->end) { + return search_escape_basic(search); + } -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON array, that is generated from - * this Array instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_array, Qfalse); + search_flush(search); + return 0; } -#ifdef RUBY_INTEGER_UNIFICATION -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse); -} +#endif /* HAVE_SIMD_SSE2 */ -#else -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse); -} +#endif /* HAVE_SIMD */ -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse); -} -#endif +static const unsigned char script_safe_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Float number. - */ -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) +static inline unsigned char search_script_safe_escape(search_state *search) { - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_float, Qfalse); -} + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = script_safe_escape_table[ch]; -/* - * call-seq: String.included(modul) - * - * Extends _modul_ with the String::Extend module. - */ -static VALUE mString_included_s(VALUE self, VALUE modul) { - VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend); - rb_call_super(1, &modul); - return result; + if (RB_UNLIKELY(ch_len)) { + if (ch_len & ESCAPE_MASK) { + if (RB_UNLIKELY(ch_len == 11)) { + const unsigned char *uptr = (const unsigned char *)search->ptr; + if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) { + search->ptr += 3; + continue; + } + } + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr += ch_len; + } + } else { + search->ptr++; + } + } + search_flush(search); + return 0; } -/* - * call-seq: to_json(*) - * - * This string should be encoded with UTF-8 A call to this method - * returns a JSON string encoded with UTF16 big endian characters as - * \u????. - */ -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) +static void convert_UTF8_to_script_safe_JSON(search_state *search) { - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_string, Qfalse); + unsigned char ch_len; + while ((ch_len = search_script_safe_escape(search))) { + escape_UTF8_char(search, ch_len); + } } -/* - * call-seq: to_json_raw_object() - * - * This method creates a raw object hash, that can be nested into - * other data structures and will be generated as a raw string. This - * method should be used, if you want to convert raw strings to JSON - * instead of UTF-8 strings, e. g. binary data. - */ -static VALUE mString_to_json_raw_object(VALUE self) -{ - VALUE ary; - VALUE result = rb_hash_new(); - rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self))); - ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*")); - rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary); - return result; -} +static const unsigned char ascii_only_escape_table[256] = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, +}; -/* - * call-seq: to_json_raw(*args) - * - * This method creates a JSON text from the result of a call to - * to_json_raw_object of this String. - */ -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self) +static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256]) { - VALUE obj = mString_to_json_raw_object(self); - Check_Type(obj, T_HASH); - return mHash_to_json(argc, argv, obj); -} + while (search->ptr < search->end) { + unsigned char ch = (unsigned char)*search->ptr; + unsigned char ch_len = escape_table[ch]; -/* - * call-seq: json_create(o) - * - * Raw Strings are JSON Objects (the raw bytes are stored in an array for the - * key "raw"). The Ruby String can be created by this module method. - */ -static VALUE mString_Extend_json_create(VALUE self, VALUE o) -{ - VALUE ary; - Check_Type(o, T_HASH); - ary = rb_hash_aref(o, rb_str_new2("raw")); - return rb_funcall(ary, i_pack, 1, rb_str_new2("C*")); + if (RB_UNLIKELY(ch_len)) { + search_flush(search); + return ch_len & CHAR_LENGTH_MASK; + } else { + search->ptr++; + } + } + search_flush(search); + return 0; } -/* - * call-seq: to_json(*) - * - * Returns a JSON string for true: 'true'. - */ -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) +static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) { - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("true", 4); -} + const unsigned char ch = (unsigned char)*search->ptr; + switch (ch_len) { + case 1: { + switch (ch) { + case '"': fbuffer_append(search->buffer, "\\\"", 2); break; + case '\\': fbuffer_append(search->buffer, "\\\\", 2); break; + case '/': fbuffer_append(search->buffer, "\\/", 2); break; + case '\b': fbuffer_append(search->buffer, "\\b", 2); break; + case '\f': fbuffer_append(search->buffer, "\\f", 2); break; + case '\n': fbuffer_append(search->buffer, "\\n", 2); break; + case '\r': fbuffer_append(search->buffer, "\\r", 2); break; + case '\t': fbuffer_append(search->buffer, "\\t", 2); break; + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[6] = { '\\', 'u', '0', '0', 0, 0 }; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + break; + } + } + break; + } + default: { + const char *hexdig = "0123456789abcdef"; + char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' }; -/* - * call-seq: to_json(*) - * - * Returns a JSON string for false: 'false'. - */ -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("false", 5); -} + uint32_t wchar = 0; -/* - * call-seq: to_json(*) - * - * Returns a JSON string for nil: 'null'. - */ -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("null", 4); + switch (ch_len) { + case 2: + wchar = ch & 0x1F; + break; + case 3: + wchar = ch & 0x0F; + break; + case 4: + wchar = ch & 0x07; + break; + } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (search->ptr[i] & 0x3F); + } + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + fbuffer_append(search->buffer, scratch, 6); + } else { + uint16_t hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (uint16_t)(wchar >> 10); + lo = 0xDC00 + (uint16_t)(wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + fbuffer_append(search->buffer, scratch, 12); + } + + break; + } + } + search->cursor = (search->ptr += ch_len); } -/* - * call-seq: to_json(*) - * - * Converts this object to a string (calling #to_s), converts - * it to a JSON string, and returns the result. This is a fallback, if no - * special method #to_json was defined for some object. - */ -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) +static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256]) { - VALUE state; - VALUE string = rb_funcall(self, i_to_s, 0); - rb_scan_args(argc, argv, "01", &state); - Check_Type(string, T_STRING); - state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string, generate_json_string, Qfalse); + unsigned char ch_len; + while ((ch_len = search_ascii_only_escape(search, escape_table))) { + full_escape_UTF8_char(search, ch_len); + } } static void State_mark(void *ptr) @@ -674,6 +708,7 @@ static void State_mark(void *ptr) rb_gc_mark_movable(state->space_before); rb_gc_mark_movable(state->object_nl); rb_gc_mark_movable(state->array_nl); + rb_gc_mark_movable(state->as_json); } static void State_compact(void *ptr) @@ -684,12 +719,7 @@ static void State_compact(void *ptr) state->space_before = rb_gc_location(state->space_before); state->object_nl = rb_gc_location(state->object_nl); state->array_nl = rb_gc_location(state->array_nl); -} - -static void State_free(void *ptr) -{ - JSON_Generator_State *state = ptr; - ruby_xfree(state); + state->as_json = rb_gc_location(state->as_json); } static size_t State_memsize(const void *ptr) @@ -697,21 +727,15 @@ static size_t State_memsize(const void *ptr) return sizeof(JSON_Generator_State); } -#ifndef HAVE_RB_EXT_RACTOR_SAFE -# undef RUBY_TYPED_FROZEN_SHAREABLE -# define RUBY_TYPED_FROZEN_SHAREABLE 0 -#endif - static const rb_data_type_t JSON_Generator_State_type = { - "JSON/Generator/State", - { + .wrap_struct_name = "JSON/Generator/State", + .function = { .dmark = State_mark, - .dfree = State_free, + .dfree = RUBY_DEFAULT_FREE, .dsize = State_memsize, .dcompact = State_compact, }, - 0, 0, - RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static void state_init(JSON_Generator_State *state) @@ -740,21 +764,196 @@ static void vstate_spill(struct generate_json_data *data) RB_OBJ_WRITTEN(vstate, Qundef, state->space_before); RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); + RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); } -static inline VALUE vstate_get(struct generate_json_data *data) +static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) { if (RB_UNLIKELY(!data->vstate)) { vstate_spill(data); } - return data->vstate; + GET_STATE(data->vstate); + state->depth = data->depth; + VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate); + // no need to restore state->depth, vstate is just a temporary State + return tmp; +} + +static VALUE +json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key) +{ + VALUE proc_args[2] = {object, is_key}; + return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil); +} + +static VALUE +convert_string_subclass(VALUE key) +{ + VALUE key_to_s = rb_funcall(key, i_to_s, 0); + + if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) { + VALUE cname = rb_obj_class(key); + rb_raise(rb_eTypeError, + "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")", + cname, "String", cname, "to_s", rb_obj_class(key_to_s)); + } + + return key_to_s; +} + +static bool enc_utf8_compatible_p(int enc_idx) +{ + if (enc_idx == usascii_encindex) return true; + if (enc_idx == utf8_encindex) return true; + return false; +} + +static VALUE encode_json_string_try(VALUE str) +{ + return rb_funcall(str, i_encode, 1, Encoding_UTF_8); +} + +static VALUE encode_json_string_rescue(VALUE str, VALUE exception) +{ + raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); + return Qundef; +} + +static inline int json_str_coderange(VALUE str) { + int coderange = RB_ENC_CODERANGE(str); + if (coderange == RUBY_ENC_CODERANGE_UNKNOWN) { + coderange = rb_enc_str_coderange(str); + } + return coderange; +} + +static inline bool valid_json_string_p(VALUE str) +{ + int coderange = json_str_coderange(str); + + if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) { + return true; + } + + if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) { + return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str)); + } + + return false; +} + +NOINLINE(static) VALUE convert_invalid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) { + VALUE coerced_str = json_call_as_json(data->state, str, Qfalse); + if (coerced_str != str) { + if (RB_TYPE_P(coerced_str, T_STRING)) { + if (!valid_json_string_p(coerced_str)) { + raise_generator_error(str, "source sequence is illegal/malformed utf-8"); + } + } else { + // as_json could return another type than T_STRING + if (is_key) { + raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str)); + } + } + + return coerced_str; + } + } + + if (RB_ENCODING_GET_INLINED(str) == binary_encindex) { + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + return utf8_string; + case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8_string; + break; + } + } + + return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); +} + +ALWAYS_INLINE(static) VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (RB_LIKELY(valid_json_string_p(str))) { + return str; + } + else { + return convert_invalid_encoding(data, str, as_json_called, is_key); + } +} + +static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + fbuffer_append_char(buffer, '"'); + + long len; + search_state search; + search.buffer = buffer; + RSTRING_GETMEM(obj, search.ptr, len); + search.cursor = search.ptr; + search.end = search.ptr + len; + +#ifdef HAVE_SIMD + search.matches_mask = 0; + search.has_matches = false; + search.chunk_base = NULL; + search.chunk_end = NULL; +#endif /* HAVE_SIMD */ + + switch (json_str_coderange(obj)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + if (RB_UNLIKELY(data->state->ascii_only)) { + convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); + } else if (RB_UNLIKELY(data->state->script_safe)) { + convert_UTF8_to_script_safe_JSON(&search); + } else { + convert_UTF8_to_JSON(&search); + } + break; + default: + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + break; + } + fbuffer_append_char(buffer, '"'); +} + +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + obj = ensure_valid_encoding(data, obj, false, false); + raw_generate_json_string(buffer, data, obj); } struct hash_foreach_arg { + VALUE hash; struct generate_json_data *data; - int iter; + int first_key_type; + bool first; + bool mixed_keys_encountered; }; +NOINLINE(static) void +json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg) +{ + if (arg->mixed_keys_encountered) { + return; + } + arg->mixed_keys_encountered = true; + + JSON_Generator_State *state = arg->data->state; + if (state->on_duplicate_key != JSON_IGNORE) { + VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse; + rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise); + } +} + static int json_object_i(VALUE key, VALUE val, VALUE _arg) { @@ -764,256 +963,257 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) FBuffer *buffer = data->buffer; JSON_Generator_State *state = data->state; - long depth = state->depth; - int j; + long depth = data->depth; + int key_type = rb_type(key); - if (arg->iter > 0) fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append_str(buffer, state->object_nl); + if (arg->first) { + arg->first = false; + arg->first_key_type = key_type; } - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + else { + fbuffer_append_char(buffer, ','); + } + + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } VALUE key_to_s; - switch(rb_type(key)) { + bool as_json_called = false; + + start: + switch (key_type) { case T_STRING: + if (RB_UNLIKELY(arg->first_key_type != T_STRING)) { + json_inspect_hash_with_mixed_keys(arg); + } + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { key_to_s = key; } else { - key_to_s = rb_funcall(key, i_to_s, 0); + key_to_s = convert_string_subclass(key); } break; case T_SYMBOL: + if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) { + json_inspect_hash_with_mixed_keys(arg); + } + key_to_s = rb_sym2str(key); break; default: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + key = json_call_as_json(data->state, key, Qtrue); + key_type = rb_type(key); + as_json_called = true; + goto start; + } else { + raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key)); + } + } key_to_s = rb_convert_type(key, T_STRING, "String", "to_s"); break; } + key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true); + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { - generate_json_string(buffer, data, state, key_to_s); + raw_generate_json_string(buffer, data, key_to_s); } else { - generate_json(buffer, data, state, key_to_s); + generate_json(buffer, data, key_to_s); } - if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); + if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before); fbuffer_append_char(buffer, ':'); - if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); - generate_json(buffer, data, state, val); + if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); + generate_json(buffer, data, val); - arg->iter++; return ST_CONTINUE; } -static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static inline long increase_depth(struct generate_json_data *data) { - long max_nesting = state->max_nesting; - long depth = ++state->depth; - int j; - - if (max_nesting != 0 && depth > max_nesting) { - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); + JSON_Generator_State *state = data->state; + long depth = ++data->depth; + if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) { + rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth); } + return depth; +} + +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + long depth = increase_depth(data); if (RHASH_SIZE(obj) == 0) { fbuffer_append(buffer, "{}", 2); - --state->depth; + --data->depth; return; } fbuffer_append_char(buffer, '{'); struct hash_foreach_arg arg = { + .hash = obj, .data = data, - .iter = 0, + .first = true, }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); - depth = --state->depth; - if (RB_UNLIKELY(state->object_nl)) { - fbuffer_append_str(buffer, state->object_nl); - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + depth = --data->depth; + if (RB_UNLIKELY(data->state->object_nl)) { + fbuffer_append_str(buffer, data->state->object_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - long max_nesting = state->max_nesting; - long depth = ++state->depth; - int i, j; - if (max_nesting != 0 && depth > max_nesting) { - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); - } + long depth = increase_depth(data); if (RARRAY_LEN(obj) == 0) { fbuffer_append(buffer, "[]", 2); - --state->depth; + --data->depth; return; } fbuffer_append_char(buffer, '['); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); - for(i = 0; i < RARRAY_LEN(obj); i++) { + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); + for (int i = 0; i < RARRAY_LEN(obj); i++) { if (i > 0) { fbuffer_append_char(buffer, ','); - if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl); + if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); } - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } - generate_json(buffer, data, state, RARRAY_AREF(obj, i)); + generate_json(buffer, data, RARRAY_AREF(obj, i)); } - state->depth = --depth; - if (RB_UNLIKELY(state->array_nl)) { - fbuffer_append_str(buffer, state->array_nl); - if (RB_UNLIKELY(state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, state->indent); - } + data->depth = --depth; + if (RB_UNLIKELY(data->state->array_nl)) { + fbuffer_append_str(buffer, data->state->array_nl); + if (RB_UNLIKELY(data->state->indent)) { + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, ']'); } -static inline int enc_utf8_compatible_p(int enc_idx) -{ - if (enc_idx == usascii_encindex) return 1; - if (enc_idx == utf8_encindex) return 1; - return 0; -} - -static VALUE encode_json_string_try(VALUE str) -{ - return rb_funcall(str, i_encode, 1, Encoding_UTF_8); -} - -static VALUE encode_json_string_rescue(VALUE str, VALUE exception) +static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); - return Qundef; -} - -static inline VALUE ensure_valid_encoding(VALUE str) -{ - int encindex = RB_ENCODING_GET(str); - VALUE utf8_string; - if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { - if (encindex == binary_encindex) { - utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); - switch (rb_enc_str_coderange(utf8_string)) { - case ENC_CODERANGE_7BIT: - return utf8_string; - case ENC_CODERANGE_VALID: - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Raise in 3.0.0 - rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); - return utf8_string; - break; - } - } - - str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); + VALUE tmp; + if (rb_respond_to(obj, i_to_json)) { + tmp = json_call_to_json(data, obj); + Check_Type(tmp, T_STRING); + fbuffer_append_str(buffer, tmp); + } else { + tmp = rb_funcall(obj, i_to_s, 0); + Check_Type(tmp, T_STRING); + generate_json_string(buffer, data, tmp); } - return str; } -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - obj = ensure_valid_encoding(obj); - - fbuffer_append_char(buffer, '"'); - - switch(rb_enc_str_coderange(obj)) { - case ENC_CODERANGE_7BIT: - convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table); - break; - case ENC_CODERANGE_VALID: - if (RB_UNLIKELY(state->ascii_only)) { - convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe); - } else { - convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe); - } - break; - default: - raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); - break; + if (data->state->strict) { + generate_json_string(buffer, data, rb_sym2str(obj)); + } else { + generate_json_fallback(buffer, data, obj); } - fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); - fbuffer_append_str(buffer, tmp); -} - -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) -{ - if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, data, state, obj); - else - generate_json_bignum(buffer, data, state, obj); + fbuffer_append_str(buffer, StringValue(tmp)); } -#endif -static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { double value = RFLOAT_VALUE(obj); - char allow_nan = state->allow_nan; - VALUE tmp = rb_funcall(obj, i_to_s, 0); - if (!allow_nan) { - if (isinf(value) || isnan(value)) { - raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp); + char allow_nan = data->state->allow_nan; + if (isinf(value) || isnan(value)) { + /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */ + if (!allow_nan) { + if (data->state->strict && data->state->as_json) { + VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse); + if (casted_obj != obj) { + increase_depth(data); + generate_json(buffer, data, casted_obj); + data->depth--; + return; + } + } + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0)); } + + VALUE tmp = rb_funcall(obj, i_to_s, 0); + fbuffer_append_str(buffer, tmp); + return; } - fbuffer_append_str(buffer, tmp); + + /* This implementation writes directly into the buffer. We reserve + * the 32 characters that fpconv_dtoa states as its maximum. + */ + fbuffer_inc_capa(buffer, 32); + char* d = buffer->ptr + buffer->len; + int len = fpconv_dtoa(value, d); + /* fpconv_dtoa converts a float to its shortest string representation, + * but it adds a ".0" if this is a plain integer. + */ + fbuffer_consumed(buffer, len); } -static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) +static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - VALUE tmp; + VALUE fragment = RSTRUCT_GET(obj, 0); + Check_Type(fragment, T_STRING); + fbuffer_append_str(buffer, fragment); +} + +static inline void generate_json_general(FBuffer *buffer, struct generate_json_data *data, VALUE obj, bool fallback) +{ + bool as_json_called = false; +start: if (obj == Qnil) { - generate_json_null(buffer, data, state, obj); + generate_json_null(buffer, data, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, data, state, obj); + generate_json_false(buffer, data, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, data, state, obj); + generate_json_true(buffer, data, obj); } else if (RB_SPECIAL_CONST_P(obj)) { if (RB_FIXNUM_P(obj)) { - generate_json_fixnum(buffer, data, state, obj); + generate_json_fixnum(buffer, data, obj); } else if (RB_FLONUM_P(obj)) { - generate_json_float(buffer, data, state, obj); + generate_json_float(buffer, data, obj); + } else if (RB_STATIC_SYM_P(obj)) { + generate_json_symbol(buffer, data, obj); } else { goto general; } @@ -1021,61 +1221,85 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON VALUE klass = RBASIC_CLASS(obj); switch (RB_BUILTIN_TYPE(obj)) { case T_BIGNUM: - generate_json_bignum(buffer, data, state, obj); + generate_json_bignum(buffer, data, obj); break; case T_HASH: - if (klass != rb_cHash) goto general; - generate_json_object(buffer, data, state, obj); + if (fallback && klass != rb_cHash) goto general; + generate_json_object(buffer, data, obj); break; case T_ARRAY: - if (klass != rb_cArray) goto general; - generate_json_array(buffer, data, state, obj); + if (fallback && klass != rb_cArray) goto general; + generate_json_array(buffer, data, obj); break; case T_STRING: - if (klass != rb_cString) goto general; - generate_json_string(buffer, data, state, obj); + if (fallback && klass != rb_cString) goto general; + + if (RB_LIKELY(valid_json_string_p(obj))) { + raw_generate_json_string(buffer, data, obj); + } else if (as_json_called) { + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + } else { + obj = ensure_valid_encoding(data, obj, false, false); + as_json_called = true; + goto start; + } + break; + case T_SYMBOL: + generate_json_symbol(buffer, data, obj); break; case T_FLOAT: - if (klass != rb_cFloat) goto general; - generate_json_float(buffer, data, state, obj); + if (fallback && klass != rb_cFloat) goto general; + generate_json_float(buffer, data, obj); + break; + case T_STRUCT: + if (klass != cFragment) goto general; + generate_json_fragment(buffer, data, obj); break; default: general: - if (state->strict) { - raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); - } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); - Check_Type(tmp, T_STRING); - fbuffer_append_str(buffer, tmp); + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + obj = json_call_as_json(data->state, obj, Qfalse); + as_json_called = true; + goto start; + } else { + raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); + } } else { - tmp = rb_funcall(obj, i_to_s, 0); - Check_Type(tmp, T_STRING); - generate_json_string(buffer, data, state, tmp); + generate_json_fallback(buffer, data, obj); } } } } +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, true); +} + +static void generate_json_no_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, false); +} + static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - data->func(data->buffer, data, data->state, data->obj); + data->func(data->buffer, data, data->obj); - return Qnil; + return fbuffer_finalize(data->buffer); } -static VALUE generate_json_rescue(VALUE d, VALUE exc) +static VALUE generate_json_ensure(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; fbuffer_free(data->buffer); - rb_exc_raise(exc); - return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) +static inline VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) { GET_STATE(self); @@ -1087,22 +1311,38 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, struct generate_json_data data = { .buffer = &buffer, - .vstate = self, + .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json .state = state, + .depth = state->depth, .obj = obj, .func = func }; - rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} - return fbuffer_finalize(&buffer); +/* call-seq: + * generate(obj) -> String + * generate(obj, anIO) -> anIO + * + * Generates a valid JSON document from object +obj+ and returns the + * result. If no valid JSON document can be created this method raises a + * GeneratorError exception. + */ +static VALUE cState_generate(int argc, VALUE *argv, VALUE self) +{ + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json, io); } -static VALUE cState_generate(VALUE self, VALUE obj, VALUE io) +/* :nodoc: */ +static VALUE cState_generate_no_fallback(int argc, VALUE *argv, VALUE self) { - VALUE result = cState_partial_generate(self, obj, generate_json, io); - GET_STATE(self); - (void)state; - return result; + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json_no_fallback, io); } static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) @@ -1127,11 +1367,14 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = origState->indent; - objState->space = origState->space; - objState->space_before = origState->space_before; - objState->object_nl = origState->object_nl; - objState->array_nl = origState->array_nl; + + RB_OBJ_WRITTEN(obj, Qundef, objState->indent); + RB_OBJ_WRITTEN(obj, Qundef, objState->space); + RB_OBJ_WRITTEN(obj, Qundef, objState->space_before); + RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->as_json); + return obj; } @@ -1182,6 +1425,7 @@ static VALUE string_config(VALUE config) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; @@ -1207,6 +1451,7 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; @@ -1230,6 +1475,7 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; @@ -1255,6 +1501,7 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; @@ -1278,11 +1525,35 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; } +/* + * call-seq: as_json() + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json(VALUE self) +{ + GET_STATE(self); + return state->as_json; +} + +/* + * call-seq: as_json=(as_json) + * + * This string is put at the end of a line that holds a JSON array. + */ +static VALUE cState_as_json_set(VALUE self, VALUE as_json) +{ + rb_check_frozen(self); + GET_STATE(self); + RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc")); + return Qnil; +} /* * call-seq: check_circular? @@ -1310,7 +1581,21 @@ static VALUE cState_max_nesting(VALUE self) static long long_config(VALUE num) { - return RTEST(num) ? FIX2LONG(num) : 0; + return RTEST(num) ? NUM2LONG(num) : 0; +} + +// depth must never be negative; reject early with a clear error. +static long depth_config(VALUE num) +{ + if (!RTEST(num)) return 0; + long d = NUM2LONG(num); + if (RB_UNLIKELY(d < 0)) { + rb_raise(rb_eArgError, "depth must be >= 0 (got %ld)", d); + } + if (RB_UNLIKELY(d > INT_MAX)) { + rb_raise(rb_eArgError, "depth is too large (got %ld)", d); + } + return d; } /* @@ -1321,6 +1606,7 @@ static long long_config(VALUE num) */ static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); state->max_nesting = long_config(depth); return Qnil; @@ -1346,6 +1632,7 @@ static VALUE cState_script_safe(VALUE self) */ static VALUE cState_script_safe_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->script_safe = RTEST(enable); return Qnil; @@ -1377,6 +1664,7 @@ static VALUE cState_strict(VALUE self) */ static VALUE cState_strict_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->strict = RTEST(enable); return Qnil; @@ -1401,6 +1689,7 @@ static VALUE cState_allow_nan_p(VALUE self) */ static VALUE cState_allow_nan_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->allow_nan = RTEST(enable); return Qnil; @@ -1425,11 +1714,25 @@ static VALUE cState_ascii_only_p(VALUE self) */ static VALUE cState_ascii_only_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->ascii_only = RTEST(enable); return Qnil; } +static VALUE cState_allow_duplicate_key_p(VALUE self) +{ + GET_STATE(self); + switch (state->on_duplicate_key) { + case JSON_IGNORE: + return Qtrue; + case JSON_DEPRECATED: + return Qnil; + default: + return Qfalse; + } +} + /* * call-seq: depth * @@ -1449,8 +1752,9 @@ static VALUE cState_depth(VALUE self) */ static VALUE cState_depth_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); - state->depth = long_config(depth); + state->depth = depth_config(depth); return Qnil; } @@ -1482,32 +1786,54 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_ */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { + rb_check_frozen(self); GET_STATE(self); buffer_initial_length_set(state, buffer_initial_length); return Qnil; } +struct configure_state_data { + JSON_Generator_State *state; + VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated +}; + +static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value) +{ + if (RTEST(data->vstate)) { + RB_OBJ_WRITE(data->vstate, field, value); + } else { + *field = value; + } +} + static int configure_state_i(VALUE key, VALUE val, VALUE _arg) { - JSON_Generator_State *state = (JSON_Generator_State *)_arg; + struct configure_state_data *data = (struct configure_state_data *)_arg; + JSON_Generator_State *state = data->state; - if (key == sym_indent) { state->indent = string_config(val); } - else if (key == sym_space) { state->space = string_config(val); } - else if (key == sym_space_before) { state->space_before = string_config(val); } - else if (key == sym_object_nl) { state->object_nl = string_config(val); } - else if (key == sym_array_nl) { state->array_nl = string_config(val); } + if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); } + else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); } + else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); } + else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); } + else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); } else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } - else if (key == sym_depth) { state->depth = long_config(val); } + else if (key == sym_depth) { state->depth = depth_config(val); } else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } else if (key == sym_script_safe) { state->script_safe = RTEST(val); } else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } else if (key == sym_strict) { state->strict = RTEST(val); } + else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_as_json) { + VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; + state->as_json_single_arg = proc && rb_proc_arity(proc) == 1; + state_write_value(data, &state->as_json, proc); + } return ST_CONTINUE; } -static void configure_state(JSON_Generator_State *state, VALUE config) +static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config) { if (!RTEST(config)) return; @@ -1515,23 +1841,29 @@ static void configure_state(JSON_Generator_State *state, VALUE config) if (!RHASH_SIZE(config)) return; + struct configure_state_data data = { + .state = state, + .vstate = vstate + }; + // We assume in most cases few keys are set so it's faster to go over // the provided keys than to check all possible keys. - rb_hash_foreach(config, configure_state_i, (VALUE)state); + rb_hash_foreach(config, configure_state_i, (VALUE)&data); } static VALUE cState_configure(VALUE self, VALUE opts) { + rb_check_frozen(self); GET_STATE(self); - configure_state(state, opts); + configure_state(state, self, opts); return self; } -static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +static VALUE cState_m_do_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io, generator_func func) { JSON_Generator_State state = {0}; state_init(&state); - configure_state(&state, opts); + configure_state(&state, Qfalse, opts); char stack_buffer[FBUFFER_STACK_SIZE]; FBuffer buffer = { @@ -1543,17 +1875,23 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) .buffer = &buffer, .vstate = Qfalse, .state = &state, + .depth = state.depth, .obj = obj, - .func = generate_json, + .func = func, }; - rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} + +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json); +} - return fbuffer_finalize(&buffer); +static VALUE cState_m_generate_no_fallback(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json_no_fallback); } -/* - * - */ void Init_generator(void) { #ifdef HAVE_RB_EXT_RACTOR_SAFE @@ -1564,6 +1902,10 @@ void Init_generator(void) rb_require("json/common"); mJSON = rb_define_module("JSON"); + + rb_global_variable(&cFragment); + cFragment = rb_const_get(mJSON, rb_intern("Fragment")); + VALUE mExt = rb_define_module_under(mJSON, "Ext"); VALUE mGenerator = rb_define_module_under(mExt, "Generator"); @@ -1591,6 +1933,8 @@ void Init_generator(void) rb_define_method(cState, "object_nl=", cState_object_nl_set, 1); rb_define_method(cState, "array_nl", cState_array_nl, 0); rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); + rb_define_method(cState, "as_json", cState_as_json, 0); + rb_define_method(cState, "as_json=", cState_as_json_set, 1); rb_define_method(cState, "max_nesting", cState_max_nesting, 0); rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); rb_define_method(cState, "script_safe", cState_script_safe, 0); @@ -1611,51 +1955,13 @@ void Init_generator(void) rb_define_method(cState, "depth=", cState_depth_set, 1); rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); - rb_define_private_method(cState, "_generate", cState_generate, 2); - - rb_define_singleton_method(cState, "generate", cState_m_generate, 3); - - VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); - - VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); - rb_define_method(mObject, "to_json", mObject_to_json, -1); - - VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash"); - rb_define_method(mHash, "to_json", mHash_to_json, -1); - - VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array"); - rb_define_method(mArray, "to_json", mArray_to_json, -1); - -#ifdef RUBY_INTEGER_UNIFICATION - VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); - rb_define_method(mInteger, "to_json", mInteger_to_json, -1); -#else - VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum"); - rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1); - - VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum"); - rb_define_method(mBignum, "to_json", mBignum_to_json, -1); -#endif - VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float"); - rb_define_method(mFloat, "to_json", mFloat_to_json, -1); - - VALUE mString = rb_define_module_under(mGeneratorMethods, "String"); - rb_define_singleton_method(mString, "included", mString_included_s, 1); - rb_define_method(mString, "to_json", mString_to_json, -1); - rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1); - rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0); + rb_define_method(cState, "generate", cState_generate, -1); + rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1); - mString_Extend = rb_define_module_under(mString, "Extend"); - rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1); + rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); - VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); - rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); - - VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); - rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); - - VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); - rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); + rb_define_singleton_method(cState, "_generate_no_fallback", cState_m_generate_no_fallback, 3); rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); @@ -1663,10 +1969,6 @@ void Init_generator(void) i_to_s = rb_intern("to_s"); i_to_json = rb_intern("to_json"); i_new = rb_intern("new"); - i_pack = rb_intern("pack"); - i_unpack = rb_intern("unpack"); - i_create_id = rb_intern("create_id"); - i_extend = rb_intern("extend"); i_encode = rb_intern("encode"); sym_indent = ID2SYM(rb_intern("indent")); @@ -1682,10 +1984,14 @@ void Init_generator(void) sym_script_safe = ID2SYM(rb_intern("script_safe")); sym_escape_slash = ID2SYM(rb_intern("escape_slash")); sym_strict = ID2SYM(rb_intern("strict")); + sym_as_json = ID2SYM(rb_intern("as_json")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); binary_encindex = rb_ascii8bit_encindex(); rb_require("json/ext/generator/state"); + + simd_impl = find_simd_implementation(); } diff --git a/ext/json/json.gemspec b/ext/json/json.gemspec index 321a85fcf9..5575731025 100644 --- a/ext/json/json.gemspec +++ b/ext/json/json.gemspec @@ -11,14 +11,13 @@ spec = Gem::Specification.new do |s| s.version = version s.summary = "JSON Implementation for Ruby" - s.homepage = "https://ruby.github.io/json" + s.homepage = "https://github.com/ruby/json" s.metadata = { 'bug_tracker_uri' => 'https://github.com/ruby/json/issues', 'changelog_uri' => 'https://github.com/ruby/json/blob/master/CHANGES.md', - 'documentation_uri' => 'https://ruby.github.io/json/doc/index.html', + 'documentation_uri' => 'https://docs.ruby-lang.org/en/master/JSON.html', 'homepage_uri' => s.homepage, 'source_code_uri' => 'https://github.com/ruby/json', - 'wiki_uri' => 'https://github.com/ruby/json/wiki' } s.required_ruby_version = Gem::Requirement.new(">= 2.7") @@ -45,15 +44,14 @@ spec = Gem::Specification.new do |s| "LEGAL", "README.md", "json.gemspec", - *Dir["lib/**/*.rb"], - ] + ] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__)) if java_ext s.platform = 'java' s.files += Dir["lib/json/ext/**/*.jar"] else s.extensions = Dir["ext/json/**/extconf.rb"] - s.files += Dir["ext/json/**/*.{c,h,rl}"] + s.files += Dir["ext/json/**/*.{c,h,rb}"] end end diff --git a/ext/json/json.h b/ext/json/json.h new file mode 100644 index 0000000000..cf9420d4dd --- /dev/null +++ b/ext/json/json.h @@ -0,0 +1,134 @@ +#ifndef _JSON_H_ +#define _JSON_H_ + +#include "ruby.h" +#include "ruby/encoding.h" +#include <stdint.h> + +#ifndef RBIMPL_ASSERT_OR_ASSUME +# define RBIMPL_ASSERT_OR_ASSUME(x) +#endif + +#if defined(RUBY_DEBUG) && RUBY_DEBUG +# define JSON_ASSERT RUBY_ASSERT +#else +# ifdef JSON_DEBUG +# include <assert.h> +# define JSON_ASSERT(x) assert(x) +# else +# define JSON_ASSERT(x) +# endif +#endif + +/* shims */ + +#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG +# define INT64T2NUM(x) LL2NUM(x) +# define UINT64T2NUM(x) ULL2NUM(x) +#elif SIZEOF_UINT64_T == SIZEOF_LONG +# define INT64T2NUM(x) LONG2NUM(x) +# define UINT64T2NUM(x) ULONG2NUM(x) +#else +# error No uint64_t conversion +#endif + +/* This is the fallback definition from Ruby 3.4 */ +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include <cstdbool> +# endif +#elif defined(HAVE_STDBOOL_H) +# include <stdbool.h> +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef HAVE_RUBY_XFREE_SIZED +static inline void ruby_xfree_sized(void *ptr, size_t oldsize) +{ + ruby_xfree(ptr); +} + +static inline void *ruby_xrealloc2_sized(void *ptr, size_t new_elems, size_t elem_size, size_t old_elems) +{ + return ruby_xrealloc2(ptr, new_elems, elem_size); +} +#endif + +# define JSON_SIZED_REALLOC_N(v, T, m, n) \ + ((v) = (T *)ruby_xrealloc2_sized((void *)(v), (m), sizeof(T), (n))) + +# define JSON_SIZED_FREE(v) ruby_xfree_sized((void *)(v), sizeof(*(v))) +# define JSON_SIZED_FREE_N(v, n) ruby_xfree_sized((void *)(v), sizeof(*(v)) * (n)) + +#ifndef HAVE_RB_EXT_RACTOR_SAFE +# undef RUBY_TYPED_FROZEN_SHAREABLE +# define RUBY_TYPED_FROZEN_SHAREABLE 0 +#endif + +#ifdef RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +#else +# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE +# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +# else +# define RUBY_TYPED_EMBEDDABLE 0 +# endif +#endif + +#ifndef NORETURN +#if defined(__has_attribute) && __has_attribute(noreturn) +#define NORETURN(x) __attribute__((noreturn)) x +#else +#define NORETURN(x) x +#endif +#endif + +#ifndef NOINLINE +#if defined(__has_attribute) && __has_attribute(noinline) +#define NOINLINE(x) __attribute__((noinline)) x +#else +#define NOINLINE(x) x +#endif +#endif + +#ifndef ALWAYS_INLINE +#if defined(__has_attribute) && __has_attribute(always_inline) +#define ALWAYS_INLINE(x) inline __attribute__((always_inline)) x +#else +#define ALWAYS_INLINE(x) inline x +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + +#ifdef RUBY_DEBUG +#ifndef JSON_DEBUG +#define JSON_DEBUG RUBY_DEBUG +#endif +#endif + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX +#define JSON_CPU_LITTLE_ENDIAN_64BITS 1 +#else +#define JSON_CPU_LITTLE_ENDIAN_64BITS 0 +#endif + +#endif // _JSON_H_ diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb index dfd9b7dfc2..26d601926f 100644 --- a/ext/json/lib/json.rb +++ b/ext/json/lib/json.rb @@ -6,6 +6,15 @@ require 'json/common' # # \JSON is a lightweight data-interchange format. # +# \JSON is easy for us humans to read and write, +# and equally simple for machines to read (parse) and write (generate). +# +# \JSON is language-independent, making it an ideal interchange format +# for applications in differing programming languages +# and on differing operating systems. +# +# == \JSON Values +# # A \JSON value is one of the following: # - Double-quoted text: <tt>"foo"</tt>. # - Number: +1+, +1.0+, +2.0e2+. @@ -127,6 +136,24 @@ require 'json/common' # # --- # +# Option +allow_duplicate_key+ specifies whether duplicate keys in objects +# should be ignored or cause an error to be raised: +# +# When not specified: +# # The last value is used and a deprecation warning emitted. +# JSON.parse('{"a": 1, "a":2}') => {"a" => 2} +# # warning: detected duplicate keys in JSON object. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# +# When set to `+true+` +# # The last value is used. +# JSON.parse('{"a": 1, "a":2}') => {"a" => 2} +# +# When set to `+false+`, the future default: +# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError) +# +# --- +# # Option +allow_nan+ (boolean) specifies whether to allow # NaN, Infinity, and MinusInfinity in +source+; # defaults to +false+. @@ -143,8 +170,47 @@ require 'json/common' # ruby = JSON.parse(source, {allow_nan: true}) # ruby # => [NaN, Infinity, -Infinity] # +# --- +# +# Option +allow_trailing_comma+ (boolean) specifies whether to allow +# trailing commas in objects and arrays; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse('[1,]') # unexpected character: ']' at line 1 column 4 (JSON::ParserError) +# +# When enabled: +# JSON.parse('[1,]', allow_trailing_comma: true) # => [1] +# +# --- +# +# Option +allow_control_characters+ (boolean) specifies whether to allow +# unescaped ASCII control characters, such as newlines, in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld" +# +# --- +# +# Option +allow_invalid_escape+ (boolean) specifies whether to ignore backslahes that are followed +# by an invalid escape character in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse('"Hell\o"') # invalid escape character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse('"Hell\o"', allow_invalid_escape: true) # => "Hello" +# # ====== Output Options # +# Option +freeze+ (boolean) specifies whether the returned objects will be frozen; +# defaults to +false+. +# # Option +symbolize_names+ (boolean) specifies whether returned \Hash keys # should be Symbols; # defaults to +false+ (use Strings). @@ -269,8 +335,27 @@ require 'json/common' # JSON.generate(JSON::MinusInfinity) # # Allow: -# ruby = [Float::NaN, Float::Infinity, Float::MinusInfinity] -# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,-Infinity]' +# ruby = [Float::NAN, Float::INFINITY, JSON::NaN, JSON::Infinity, JSON::MinusInfinity] +# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,NaN,Infinity,-Infinity]' +# +# --- +# +# Option +allow_duplicate_key+ (boolean) specifies whether +# hashes with duplicate keys should be allowed or produce an error. +# defaults to emit a deprecation warning. +# +# With the default, (not set): +# Warning[:deprecated] = true +# JSON.generate({ foo: 1, "foo" => 2 }) +# # warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# # => '{"foo":1,"foo":2}' +# +# With <tt>false</tt> +# JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false) +# # detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError) +# +# In version 3.0, <tt>false</tt> will become the default. # # --- # @@ -351,6 +436,9 @@ require 'json/common' # # == \JSON Additions # +# Note that JSON Additions must only be used with trusted data, and is +# deprecated. +# # When you "round trip" a non-\String object from Ruby to \JSON and back, # you have a new \String, instead of the object you began with: # ruby0 = Range.new(0, 2) diff --git a/ext/json/lib/json/add/core.rb b/ext/json/lib/json/add/core.rb index 485f097fff..61ff454212 100644 --- a/ext/json/lib/json/add/core.rb +++ b/ext/json/lib/json/add/core.rb @@ -7,6 +7,7 @@ require 'json/add/date_time' require 'json/add/exception' require 'json/add/range' require 'json/add/regexp' +require 'json/add/string' require 'json/add/struct' require 'json/add/symbol' require 'json/add/time' diff --git a/ext/json/lib/json/add/string.rb b/ext/json/lib/json/add/string.rb new file mode 100644 index 0000000000..9c3bde27fb --- /dev/null +++ b/ext/json/lib/json/add/string.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true +unless defined?(::JSON::JSON_LOADED) and ::JSON::JSON_LOADED + require 'json' +end + +class String + # call-seq: json_create(o) + # + # Raw Strings are JSON Objects (the raw bytes are stored in an array for the + # key "raw"). The Ruby String can be created by this class method. + def self.json_create(object) + object["raw"].pack("C*") + end + + # call-seq: to_json_raw_object() + # + # This method creates a raw object hash, that can be nested into + # other data structures and will be generated as a raw string. This + # method should be used, if you want to convert raw strings to JSON + # instead of UTF-8 strings, e. g. binary data. + def to_json_raw_object + { + JSON.create_id => self.class.name, + "raw" => unpack("C*"), + } + end + + # call-seq: to_json_raw(*args) + # + # This method creates a JSON text from the result of a call to + # to_json_raw_object of this String. + def to_json_raw(...) + to_json_raw_object.to_json(...) + end +end diff --git a/ext/json/lib/json/add/symbol.rb b/ext/json/lib/json/add/symbol.rb index 1566ebc121..806be4f025 100644 --- a/ext/json/lib/json/add/symbol.rb +++ b/ext/json/lib/json/add/symbol.rb @@ -36,8 +36,13 @@ class Symbol # # # {"json_class":"Symbol","s":"foo"} # - def to_json(*a) - as_json.to_json(*a) + def to_json(state = nil, *a) + state = ::JSON::State.from_state(state) + if state.strict? + super + else + as_json.to_json(state, *a) + end end # See #as_json. diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 197ae11f6a..230bf08012 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -5,10 +5,119 @@ require 'json/version' module JSON autoload :GenericObject, 'json/generic_object' - NOT_SET = Object.new.freeze - private_constant :NOT_SET + module ParserOptions # :nodoc: + class << self + def prepare(opts) + if opts[:object_class] || opts[:array_class] + opts = opts.dup + on_load = opts[:on_load] + + on_load = object_class_proc(opts[:object_class], on_load) if opts[:object_class] + on_load = array_class_proc(opts[:array_class], on_load) if opts[:array_class] + opts[:on_load] = on_load + end + + if opts.fetch(:create_additions, false) != false + opts = create_additions_proc(opts) + end + + opts + end + + private + + def object_class_proc(object_class, on_load) + ->(obj) do + if Hash === obj + object = object_class.new + obj.each { |k, v| object[k] = v } + obj = object + end + on_load.nil? ? obj : on_load.call(obj) + end + end + + def array_class_proc(array_class, on_load) + ->(obj) do + if Array === obj + array = array_class.new + obj.each { |v| array << v } + obj = array + end + on_load.nil? ? obj : on_load.call(obj) + end + end + + # TODO: extract :create_additions support to another gem for version 3.0 + def create_additions_proc(opts) + if opts[:symbolize_names] + raise ArgumentError, "options :symbolize_names and :create_additions cannot be used in conjunction" + end + + opts = opts.dup + create_additions = opts.fetch(:create_additions, false) + on_load = opts[:on_load] + object_class = opts[:object_class] || Hash + + opts[:on_load] = ->(object) do + case object + when String + opts[:match_string]&.each do |pattern, klass| + if match = pattern.match(object) + create_additions_warning if create_additions.nil? + object = klass.json_create(object) + break + end + end + when object_class + if opts[:create_additions] != false + if class_path = object[JSON.create_id] + klass = begin + Object.const_get(class_path) + rescue NameError => e + raise ArgumentError, "can't get const #{class_path}: #{e}" + end + + if klass.respond_to?(:json_creatable?) ? klass.json_creatable? : klass.respond_to?(:json_create) + create_additions_warning if create_additions.nil? + object = klass.json_create(object) + end + end + end + end + + on_load.nil? ? object : on_load.call(object) + end + + opts + end + + def create_additions_warning + JSON.deprecation_warning "JSON.load implicit support for `create_additions: true` is deprecated " \ + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " \ + "pass `create_additions: true`" + end + end + end class << self + def deprecation_warning(message, uplevel = 3) # :nodoc: + gem_root = File.expand_path("..", __dir__) + "/" + caller_locations(uplevel, 10).each do |frame| + if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c") + uplevel += 1 + else + break + end + end + + if RUBY_VERSION >= "3.0" + warn(message, uplevel: uplevel, category: :deprecated) + else + warn(message, uplevel: uplevel) + end + end + # :call-seq: # JSON[object] -> new_array or new_string # @@ -20,7 +129,7 @@ module JSON # Otherwise, calls JSON.generate with +object+ and +opts+ (see method #generate): # ruby = [0, 1, nil] # JSON[ruby] # => '[0,1,null]' - def [](object, opts = {}) + def [](object, opts = nil) if object.is_a?(String) return JSON.parse(object, opts) elsif object.respond_to?(:to_str) @@ -43,64 +152,76 @@ module JSON const_set :Parser, parser end - # Return the constant located at _path_. The format of _path_ has to be - # either ::A::B::C or A::B::C. In any case, A has to be located at the top - # level (absolute namespace path?). If there doesn't exist a constant at - # the given path, an ArgumentError is raised. - def deep_const_get(path) # :nodoc: - Object.const_get(path) - rescue NameError => e - raise ArgumentError, "can't get const #{path}: #{e}" - end - # Set the module _generator_ to be used by JSON. def generator=(generator) # :nodoc: old, $VERBOSE = $VERBOSE, nil @generator = generator - generator_methods = generator::GeneratorMethods - for const in generator_methods.constants - klass = const_get(const) - modul = generator_methods.const_get(const) - klass.class_eval do - instance_methods(false).each do |m| - m.to_s == 'to_json' and remove_method m + if generator.const_defined?(:GeneratorMethods) + generator_methods = generator::GeneratorMethods + for const in generator_methods.constants + klass = const_get(const) + modul = generator_methods.const_get(const) + klass.class_eval do + instance_methods(false).each do |m| + m.to_s == 'to_json' and remove_method m + end + include modul end - include modul end end self.state = generator::State - const_set :State, self.state - const_set :SAFE_STATE_PROTOTYPE, State.new # for JRuby - const_set :FAST_STATE_PROTOTYPE, create_fast_state - const_set :PRETTY_STATE_PROTOTYPE, create_pretty_state + const_set :State, state ensure $VERBOSE = old end - def create_fast_state - State.new( - :indent => '', - :space => '', - :object_nl => "", - :array_nl => "", - :max_nesting => false - ) - end - - def create_pretty_state - State.new( - :indent => ' ', - :space => ' ', - :object_nl => "\n", - :array_nl => "\n" - ) - end - # Returns the JSON generator module that is used by JSON. attr_reader :generator # Sets or Returns the JSON generator state class that is used by JSON. attr_accessor :state + + private + + # Called from the extension when a hash has both string and symbol keys + def on_mixed_keys_hash(hash, do_raise) + set = {} + hash.each_key do |key| + key_str = key.to_s + + if set[key_str] + message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}" + if do_raise + raise GeneratorError, message + else + deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`") + end + else + set[key_str] = true + end + end + end + + def deprecated_singleton_attr_accessor(*attrs) + args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : "" + attrs.each do |attr| + singleton_class.class_eval <<~RUBY + def #{attr} + warn "JSON.#{attr} is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args} + @#{attr} + end + + def #{attr}=(val) + warn "JSON.#{attr}= is deprecated and will be removed in json 3.0.0", uplevel: 1 #{args} + @#{attr} = val + end + + def _#{attr} + @#{attr} + end + RUBY + end + end end # Sets create identifier, which is used to decide if the _json_create_ @@ -116,32 +237,24 @@ module JSON Thread.current[:"JSON.create_id"] || 'json_class' end - NaN = 0.0/0 + NaN = Float::NAN - Infinity = 1.0/0 + Infinity = Float::INFINITY MinusInfinity = -Infinity # The base exception for JSON errors. - class JSONError < StandardError - def self.wrap(exception) - obj = new("Wrapped(#{exception.class}): #{exception.message.inspect}") - obj.set_backtrace exception.backtrace - obj - end - end + class JSONError < StandardError; end # This exception is raised if a parser error occurs. - class ParserError < JSONError; end + class ParserError < JSONError + attr_reader :line, :column + end # This exception is raised if the nesting of parsed data structures is too # deep. class NestingError < ParserError; end - # :stopdoc: - class CircularDatastructure < NestingError; end - # :startdoc: - # This exception is raised if a generator or unparser error occurs. class GeneratorError < JSONError attr_reader :invalid_object @@ -152,20 +265,40 @@ module JSON end def detailed_message(...) + # Exception#detailed_message doesn't exist until Ruby 3.2 + super_message = defined?(super) ? super : message + if @invalid_object.nil? - super + super_message else - "#{super}\nInvalid object: #{@invalid_object.inspect}" + "#{super_message}\nInvalid object: #{@invalid_object.inspect}" end end end - # For backwards compatibility - UnparserError = GeneratorError # :nodoc: + # Fragment of JSON document that is to be included as is: + # fragment = JSON::Fragment.new("[1, 2, 3]") + # JSON.generate({ count: 3, items: fragments }) + # + # This allows to easily assemble multiple JSON fragments that have + # been persisted somewhere without having to parse them nor resorting + # to string interpolation. + # + # Note: no validation is performed on the provided string. It is the + # responsibility of the caller to ensure the string contains valid JSON. + Fragment = Struct.new(:json) do + def initialize(json) + unless string = String.try_convert(json) + raise TypeError, " no implicit conversion of #{json.class} into String" + end - # This exception is raised if the required unicode support is missing on the - # system. Usually this means that the iconv library is not installed. - class MissingUnicodeSupport < JSONError; end + super(string) + end + + def to_json(state = nil, *) + json + end + end module_function @@ -218,9 +351,16 @@ module JSON # JSON.parse('') # def parse(source, opts = nil) + opts = ParserOptions.prepare(opts) unless opts.nil? Parser.parse(source, opts) end + PARSE_L_OPTIONS = { + max_nesting: false, + allow_nan: true, + }.freeze + private_constant :PARSE_L_OPTIONS + # :call-seq: # JSON.parse!(source, opts) -> object # @@ -232,12 +372,12 @@ module JSON # - Option +max_nesting+, if not provided, defaults to +false+, # which disables checking for nesting depth. # - Option +allow_nan+, if not provided, defaults to +true+. - def parse!(source, opts = {}) - opts = { - :max_nesting => false, - :allow_nan => true - }.merge(opts) - Parser.new(source, **(opts||{})).parse + def parse!(source, opts = nil) + if opts.nil? + parse(source, PARSE_L_OPTIONS) + else + parse(source, PARSE_L_OPTIONS.merge(opts)) + end end # :call-seq: @@ -258,7 +398,7 @@ module JSON # JSON.parse!(File.read(path, opts)) # # See method #parse! - def load_file!(filespec, opts = {}) + def load_file!(filespec, opts = nil) parse!(File.read(filespec, encoding: Encoding::UTF_8), opts) end @@ -267,7 +407,7 @@ module JSON # # Returns a \String containing the generated \JSON data. # - # See also JSON.fast_generate, JSON.pretty_generate. + # See also JSON.pretty_generate. # # Argument +obj+ is the Ruby object to be converted to \JSON. # @@ -306,13 +446,6 @@ module JSON end end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and - # later delete them. - alias unparse generate - module_function :unparse - # :startdoc: - # :call-seq: # JSON.fast_generate(obj, opts) -> new_string # @@ -327,19 +460,21 @@ module JSON # # Raises SystemStackError (stack level too deep): # JSON.fast_generate(a) def fast_generate(obj, opts = nil) - if State === opts - state = opts + if RUBY_VERSION >= "3.0" + warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated else - state = JSON.create_fast_state.configure(opts) + warn "JSON.fast_generate is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 end - state.generate(obj) + generate(obj, opts) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias fast_unparse fast_generate - module_function :fast_unparse - # :startdoc: + PRETTY_GENERATE_OPTIONS = { + indent: ' ', + space: ' ', + object_nl: "\n", + array_nl: "\n", + }.freeze + private_constant :PRETTY_GENERATE_OPTIONS # :call-seq: # JSON.pretty_generate(obj, opts = nil) -> new_string @@ -372,57 +507,52 @@ module JSON # } # def pretty_generate(obj, opts = nil) - if State === opts - state, opts = opts, nil - else - state = JSON.create_pretty_state - end + return opts.generate(obj) if State === opts + + options = PRETTY_GENERATE_OPTIONS + if opts - if opts.respond_to? :to_hash - opts = opts.to_hash - elsif opts.respond_to? :to_h - opts = opts.to_h - else - raise TypeError, "can't convert #{opts.class} into Hash" + unless opts.is_a?(Hash) + if opts.respond_to? :to_hash + opts = opts.to_hash + elsif opts.respond_to? :to_h + opts = opts.to_h + else + raise TypeError, "can't convert #{opts.class} into Hash" + end end - state.configure(opts) + options = options.merge(opts) end - state.generate(obj) + + State.generate(obj, options, nil) end - # :stopdoc: - # I want to deprecate these later, so I'll first be silent about them, and later delete them. - alias pretty_unparse pretty_generate - module_function :pretty_unparse - # :startdoc: + # Sets or returns default options for the JSON.unsafe_load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + deprecated_singleton_attr_accessor :unsafe_load_default_options - class << self - # Sets or returns default options for the JSON.unsafe_load method. - # Initially: - # opts = JSON.load_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} - attr_accessor :unsafe_load_default_options - end - self.unsafe_load_default_options = { + @unsafe_load_default_options = { :max_nesting => false, :allow_nan => true, :allow_blank => true, :create_additions => true, } - class << self - # Sets or returns default options for the JSON.load method. - # Initially: - # opts = JSON.load_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} - attr_accessor :load_default_options - end - self.load_default_options = { + # Sets or returns default options for the JSON.load method. + # Initially: + # opts = JSON.load_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true, :allow_blank=>true, :create_additions=>true} + deprecated_singleton_attr_accessor :load_default_options + + @load_default_options = { :allow_nan => true, :allow_blank => true, :create_additions => nil, } # :call-seq: + # JSON.unsafe_load(source, options = {}) -> object # JSON.unsafe_load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -530,6 +660,7 @@ module JSON # when Array # obj.map! {|v| deserialize_obj v } # end + # obj # }) # pp ruby # Output: @@ -553,9 +684,14 @@ module JSON # def unsafe_load(source, proc = nil, options = nil) opts = if options.nil? - unsafe_load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _unsafe_load_default_options + end else - unsafe_load_default_options.merge(options) + _unsafe_load_default_options.merge(options) end unless source.is_a?(String) @@ -571,12 +707,17 @@ module JSON if opts[:allow_blank] && (source.nil? || source.empty?) source = 'null' end - result = parse(source, opts) - recurse_proc(result, &proc) if proc - result + + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc + end + + parse(source, opts) end # :call-seq: + # JSON.load(source, options = {}) -> object # JSON.load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -690,6 +831,7 @@ module JSON # when Array # obj.map! {|v| deserialize_obj v } # end + # obj # }) # pp ruby # Output: @@ -712,10 +854,20 @@ module JSON # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>} # def load(source, proc = nil, options = nil) + if proc && options.nil? && proc.is_a?(Hash) + options = proc + proc = nil + end + opts = if options.nil? - load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _load_default_options + end else - load_default_options.merge(options) + _load_default_options.merge(options) end unless source.is_a?(String) @@ -728,39 +880,24 @@ module JSON end end - if opts[:allow_blank] && (source.nil? || source.empty?) + if opts[:allow_blank] && (source.nil? || (String === source && source.empty?)) source = 'null' end - result = parse(source, opts) - recurse_proc(result, &proc) if proc - result - end - # Recursively calls passed _Proc_ if the parsed data structure is an _Array_ or _Hash_ - def recurse_proc(result, &proc) # :nodoc: - case result - when Array - result.each { |x| recurse_proc x, &proc } - proc.call result - when Hash - result.each { |x, y| recurse_proc x, &proc; recurse_proc y, &proc } - proc.call result - else - proc.call result + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc end - end - alias restore load - module_function :restore - - class << self - # Sets or returns the default options for the JSON.dump method. - # Initially: - # opts = JSON.dump_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true} - attr_accessor :dump_default_options + parse(source, opts) end - self.dump_default_options = { + + # Sets or returns the default options for the JSON.dump method. + # Initially: + # opts = JSON.dump_default_options + # opts # => {:max_nesting=>false, :allow_nan=>true} + deprecated_singleton_attr_accessor :dump_default_options + @dump_default_options = { :max_nesting => false, :allow_nan => true, } @@ -813,33 +950,177 @@ module JSON end end - opts = JSON.dump_default_options + opts = JSON._dump_default_options opts = opts.merge(:max_nesting => limit) if limit - opts = merge_dump_options(opts, **kwargs) if kwargs + opts = opts.merge(kwargs) if kwargs begin - if State === opts - opts.generate(obj, anIO) - else - State.generate(obj, opts, anIO) - end + State.generate(obj, opts, anIO) rescue JSON::NestingError raise ArgumentError, "exceed depth limit" end end - # Encodes string using String.encode. - def self.iconv(to, from, string) - string.encode(to, from) + # :stopdoc: + # All these were meant to be deprecated circa 2009, but were just set as undocumented + # so usage still exist in the wild. + def unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated + else + warn "JSON.unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 + end + generate(...) + end + module_function :unparse + + def fast_unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1, category: :deprecated + else + warn "JSON.fast_unparse is deprecated and will be removed in json 3.0.0, just use JSON.generate", uplevel: 1 + end + generate(...) end + module_function :fast_unparse - def merge_dump_options(opts, strict: NOT_SET) - opts = opts.merge(strict: strict) if NOT_SET != strict - opts + def pretty_unparse(...) + if RUBY_VERSION >= "3.0" + warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON.pretty_unparse is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + pretty_generate(...) end + module_function :fast_unparse + + def restore(...) + if RUBY_VERSION >= "3.0" + warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1, category: :deprecated + else + warn "JSON.restore is deprecated and will be removed in json 3.0.0, just use JSON.load", uplevel: 1 + end + load(...) + end + module_function :restore class << self - private :merge_dump_options + private + + def const_missing(const_name) + case const_name + when :PRETTY_STATE_PROTOTYPE + if RUBY_VERSION >= "3.0" + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1, category: :deprecated + else + warn "JSON::PRETTY_STATE_PROTOTYPE is deprecated and will be removed in json 3.0.0, just use JSON.pretty_generate", uplevel: 1 + end + state.new(PRETTY_GENERATE_OPTIONS) + else + super + end + end + end + # :startdoc: + + # JSON::Coder holds a parser and generator configuration. + # + # module MyApp + # JSONC_CODER = JSON::Coder.new( + # allow_trailing_comma: true + # ) + # end + # + # MyApp::JSONC_CODER.load(document) + # + class Coder + # :call-seq: + # JSON.new(options = nil, &block) + # + # Argument +options+, if given, contains a \Hash of options for both parsing and generating. + # See {Parsing Options}[rdoc-ref:JSON@Parsing+Options], + # and {Generating Options}[rdoc-ref:JSON@Generating+Options]. + # + # For generation, the <tt>strict: true</tt> option is always set. When a Ruby object with no native \JSON counterpart is + # encountered, the block provided to the initialize method is invoked, and must return a Ruby object that has a native + # \JSON counterpart: + # + # module MyApp + # API_JSON_CODER = JSON::Coder.new do |object| + # case object + # when Time + # object.iso8601(3) + # else + # object # Unknown type, will raise + # end + # end + # end + # + # puts MyApp::API_JSON_CODER.dump(Time.now.utc) # => "2025-01-21T08:41:44.286Z" + # + def initialize(options = nil, &as_json) + if options.nil? + options = { strict: true } + else + options = options.dup + options[:strict] = true + end + options[:as_json] = as_json if as_json + + @state = State.new(options).freeze + @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)).freeze + end + + # call-seq: + # dump(object) -> String + # dump(object, io) -> io + # + # Serialize the given object into a \JSON document. + def dump(object, io = nil) + @state.generate(object, io) + end + alias_method :generate, :dump + + # call-seq: + # load(string) -> Object + # + # Parse the given \JSON document and return an equivalent Ruby object. + def load(source) + @parser_config.parse(source) + end + alias_method :parse, :load + + # call-seq: + # load(path) -> Object + # + # Parse the given \JSON document and return an equivalent Ruby object. + def load_file(path) + load(File.read(path, encoding: Encoding::UTF_8)) + end + end + + module GeneratorMethods + # call-seq: to_json(*) + # + # Converts this object into a JSON string. + # If this object doesn't directly maps to a JSON native type, + # first convert it to a string (calling #to_s), then converts + # it to a JSON string, and returns the result. + # This is a fallback, if no special method #to_json was defined for some object. + def to_json(state = nil, *) + obj = case self + when nil, false, true, Integer, Float, Array, Hash + self + else + "#{self}" + end + + if state.nil? + JSON::State._generate_no_fallback(obj, nil, nil) + else + JSON::State.from_state(state)._generate_no_fallback(obj) + end + end end end @@ -849,8 +1130,14 @@ module ::Kernel # Outputs _objs_ to STDOUT as JSON strings in the shortest form, that is in # one line. def j(*objs) + if RUBY_VERSION >= "3.0" + warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated + else + warn "Kernel#j is deprecated and will be removed in json 3.0.0", uplevel: 1 + end + objs.each do |obj| - puts JSON::generate(obj, :allow_nan => true, :max_nesting => false) + puts JSON.generate(obj, :allow_nan => true, :max_nesting => false) end nil end @@ -858,8 +1145,14 @@ module ::Kernel # Outputs _objs_ to STDOUT as JSON strings in a pretty format, with # indentation and over many lines. def jj(*objs) + if RUBY_VERSION >= "3.0" + warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1, category: :deprecated + else + warn "Kernel#jj is deprecated and will be removed in json 3.0.0", uplevel: 1 + end + objs.each do |obj| - puts JSON::pretty_generate(obj, :allow_nan => true, :max_nesting => false) + puts JSON.pretty_generate(obj, :allow_nan => true, :max_nesting => false) end nil end @@ -870,27 +1163,11 @@ module ::Kernel # # The _opts_ argument is passed through to generate/parse respectively. See # generate and parse for their documentation. - def JSON(object, *args) - if object.is_a?(String) - return JSON.parse(object, args.first) - elsif object.respond_to?(:to_str) - str = object.to_str - if str.is_a?(String) - return JSON.parse(object.to_str, args.first) - end - end - - JSON.generate(object, args.first) + def JSON(object, opts = nil) + JSON[object, opts] end end -# Extends any Class to include _json_creatable?_ method. -class ::Class - # Returns true if this class can be used to create an instance - # from a serialised JSON string. The class has to implement a class - # method _json_create_ that expects a hash as first parameter. The hash - # should include the required data. - def json_creatable? - respond_to?(:json_create) - end +class Object + include JSON::GeneratorMethods end diff --git a/ext/json/lib/json/ext.rb b/ext/json/lib/json/ext.rb index 2082cae68f..5bacc5e371 100644 --- a/ext/json/lib/json/ext.rb +++ b/ext/json/lib/json/ext.rb @@ -6,18 +6,40 @@ module JSON # This module holds all the modules/classes that implement JSON's # functionality as C extensions. module Ext + class Parser + class << self + def parse(...) + new(...).parse + end + alias_method :parse, :parse # Allow redefinition by extensions + end + + def initialize(source, opts = nil) + @source = source + @config = Config.new(opts) + end + + def source + @source.dup + end + + def parse + @config.parse(@source) + end + end + + require 'json/ext/parser' + Ext::Parser::Config = Ext::ParserConfig + JSON.parser = Ext::Parser + if RUBY_ENGINE == 'truffleruby' - require 'json/ext/parser' require 'json/truffle_ruby/generator' - JSON.parser = Parser - JSON.generator = ::JSON::TruffleRuby::Generator + JSON.generator = JSON::TruffleRuby::Generator else - require 'json/ext/parser' require 'json/ext/generator' - JSON.parser = Parser JSON.generator = Generator end end - JSON_LOADED = true unless defined?(::JSON::JSON_LOADED) + JSON_LOADED = true unless defined?(JSON::JSON_LOADED) end diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb index 1e0d5245b1..e4f425af6a 100644 --- a/ext/json/lib/json/ext/generator/state.rb +++ b/ext/json/lib/json/ext/generator/state.rb @@ -8,20 +8,8 @@ module JSON # # Instantiates a new State object, configured by _opts_. # - # _opts_ can have the following keys: - # - # * *indent*: a string used to indent levels (default: ''), - # * *space*: a string that is put after, a : or , delimiter (default: ''), - # * *space_before*: a string that is put before a : pair delimiter (default: ''), - # * *object_nl*: a string that is put at the end of a JSON object (default: ''), - # * *array_nl*: a string that is put at the end of a JSON array (default: ''), - # * *allow_nan*: true if NaN, Infinity, and -Infinity should be - # generated, otherwise an exception is thrown, if these values are - # encountered. This options defaults to false. - # * *ascii_only*: true if only ASCII characters should be generated. This - # option defaults to false. - # * *buffer_initial_length*: sets the initial length of the generator's - # internal buffer. + # Argument +opts+, if given, contains a \Hash of options for the generation. + # See {Generating Options}[rdoc-ref:JSON@Generating+Options]. def initialize(opts = nil) if opts && !opts.empty? configure(opts) @@ -47,17 +35,6 @@ module JSON alias_method :merge, :configure - # call-seq: - # generate(obj) -> String - # generate(obj, anIO) -> anIO - # - # Generates a valid JSON document from object +obj+ and returns the - # result. If no valid JSON document can be created this method raises a - # GeneratorError exception. - def generate(obj, io = nil) - _generate(obj, io) - end - # call-seq: to_h # # Returns the configuration instance variables as a hash, that can be @@ -69,6 +46,7 @@ module JSON space_before: space_before, object_nl: object_nl, array_nl: array_nl, + as_json: as_json, allow_nan: allow_nan?, ascii_only: ascii_only?, max_nesting: max_nesting, @@ -78,6 +56,11 @@ module JSON buffer_initial_length: buffer_initial_length, } + allow_duplicate_key = allow_duplicate_key? + unless allow_duplicate_key.nil? + result[:allow_duplicate_key] = allow_duplicate_key + end + instance_variables.each do |iv| iv = iv.to_s[1..-1] result[iv.to_sym] = self[iv] @@ -92,6 +75,8 @@ module JSON # # Returns the value returned by method +name+. def [](name) + ::JSON.deprecation_warning("JSON::State#[] is deprecated and will be removed in json 3.0.0") + if respond_to?(name) __send__(name) else @@ -104,6 +89,8 @@ module JSON # # Sets the attribute name to value. def []=(name, value) + ::JSON.deprecation_warning("JSON::State#[]= is deprecated and will be removed in json 3.0.0") + if respond_to?(name_writer = "#{name}=") __send__ name_writer, value else diff --git a/ext/json/lib/json/generic_object.rb b/ext/json/lib/json/generic_object.rb index ec5aa9dcb2..5c8ace354b 100644 --- a/ext/json/lib/json/generic_object.rb +++ b/ext/json/lib/json/generic_object.rb @@ -52,14 +52,6 @@ module JSON table end - def [](name) - __send__(name) - end unless method_defined?(:[]) - - def []=(name, value) - __send__("#{name}=", value) - end unless method_defined?(:[]=) - def |(other) self.class[other.to_hash.merge(to_hash)] end diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index 95854a4327..30c0a71d2f 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.9.0' + VERSION = '2.19.8' end diff --git a/ext/json/parser/depend b/ext/json/parser/depend index 4cdf69a749..d4737b1dfb 100644 --- a/ext/json/parser/depend +++ b/ext/json/parser/depend @@ -141,6 +141,7 @@ parser.o: $(hdrdir)/ruby/internal/intern/re.h parser.o: $(hdrdir)/ruby/internal/intern/ruby.h parser.o: $(hdrdir)/ruby/internal/intern/select.h parser.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +parser.o: $(hdrdir)/ruby/internal/intern/set.h parser.o: $(hdrdir)/ruby/internal/intern/signal.h parser.o: $(hdrdir)/ruby/internal/intern/sprintf.h parser.o: $(hdrdir)/ruby/internal/intern/string.h @@ -174,6 +175,8 @@ parser.o: $(hdrdir)/ruby/ruby.h parser.o: $(hdrdir)/ruby/st.h parser.o: $(hdrdir)/ruby/subst.h parser.o: $(srcdir)/../fbuffer/fbuffer.h +parser.o: $(srcdir)/../json.h +parser.o: $(srcdir)/../simd/simd.h +parser.o: $(srcdir)/../vendor/ryu.h parser.o: parser.c -parser.o: parser.rl # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index 7bbcc33fff..a9d740c755 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,11 +1,21 @@ # frozen_string_literal: true require 'mkmf' -have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 +$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" +have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 +have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby -have_func("rb_category_warn", "ruby.h") # Missing on TruffleRuby +have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + +if RUBY_ENGINE == "ruby" + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 +end append_cflags("-std=c99") +if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + load __dir__ + "/../simd/conf.rb" +end + create_makefile 'json/ext/parser' diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index dff021bd18..d0482f6861 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,36 +1,23 @@ -/* This file is automatically generated from parser.rl by using ragel */ -#line 1 "parser.rl" -#include "ruby.h" -#include "../fbuffer/fbuffer.h" +#include "../json.h" +#include "../vendor/ryu.h" +#include "../simd/simd.h" -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE mJSON, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; +static ID i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, + sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load, + sym_allow_duplicate_key; static int binary_encindex; static int utf8_encindex; -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - #ifndef HAVE_RB_HASH_BULK_INSERT // For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +static void +rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) { long index = 0; while (index < count) { @@ -42,6 +29,17 @@ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) } #endif +#ifndef HAVE_RB_HASH_NEW_CAPA +#define rb_hash_new_capa(n) rb_hash_new() +#endif + +#ifndef HAVE_RB_STR_TO_INTERNED_STR +static VALUE rb_str_to_interned_str(VALUE str) +{ + return rb_funcall(rb_str_freeze(str), i_uminus, 0); +} +#endif + /* name cache */ #include <string.h> @@ -86,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring cache->entries[index] = rstring; } -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +#define rstring_cache_memcmp memcmp + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +#if __has_builtin(__builtin_bswap64) +#undef rstring_cache_memcmp +ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length) { - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); + // The libc memcmp has numerous complex optimizations, but in this particular case, + // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to + // inline a simpler memcmp outperforms calling the libc version. + long i = 0; + + for (; i + 8 <= length; i += 8) { + uint64_t a, b; + memcpy(&a, str + i, 8); + memcpy(&b, rptr + i, 8); + if (a != b) { + a = __builtin_bswap64(a); + b = __builtin_bswap64(b); + return (a < b) ? -1 : 1; + } + } + + for (; i < length; i++) { + if (str[i] != rptr[i]) { + return (str[i] < rptr[i]) ? -1 : 1; + } } + + return 0; } +#endif +#endif -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring) { - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } + const char *rstring_ptr; + long rstring_length; + + RSTRING_GETMEM(rstring, rstring_ptr, rstring_length); - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; + if (length == rstring_length) { + return rstring_cache_memcmp(str, rstring_ptr, length); + } else { + return (int)(length - rstring_length); } +} +ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ int low = 0; int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; while (low <= high) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); + int cmp = rstring_cache_cmp(str, length, entry); - if (last_cmp == 0) { + if (cmp == 0) { return entry; - } else if (last_cmp > 0) { + } else if (cmp > 0) { low = mid + 1; } else { high = mid - 1; } } - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - VALUE rstring = build_interned_string(str, length); if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); + rvalue_cache_insert_at(cache, low, rstring); } return rstring; } static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) { - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - int low = 0; int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; while (low <= high) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - if (last_cmp == 0) { + if (cmp == 0) { return entry; - } else if (last_cmp > 0) { + } else if (cmp > 0) { low = mid + 1; } else { high = mid - 1; } } - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - VALUE rsymbol = build_symbol(str, length); if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); + rvalue_cache_insert_at(cache, low, rsymbol); } return rsymbol; } @@ -225,19 +211,20 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { stack = rvalue_stack_spill(stack, handle, stack_ref); } else { - REALLOC_N(stack->ptr, VALUE, required); + JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa); stack->capa = required; } return stack; } -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) { if (RB_UNLIKELY(stack->head >= stack->capa)) { stack = rvalue_stack_grow(stack, handle, stack_ref); } stack->ptr[stack->head] = value; stack->head++; + return value; } static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) @@ -254,17 +241,27 @@ static void rvalue_stack_mark(void *ptr) { rvalue_stack *stack = (rvalue_stack *)ptr; long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); + if (stack && stack->ptr) { + for (index = 0; index < stack->head; index++) { + rb_gc_mark(stack->ptr[index]); + } } } +static void rvalue_stack_free_buffer(rvalue_stack *stack) +{ + JSON_SIZED_FREE_N(stack->ptr, stack->capa); + stack->ptr = NULL; +} + static void rvalue_stack_free(void *ptr) { rvalue_stack *stack = (rvalue_stack *)ptr; if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); + rvalue_stack_free_buffer(stack); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + JSON_SIZED_FREE(stack); +#endif } } @@ -275,14 +272,13 @@ static size_t rvalue_stack_memsize(const void *ptr) } static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { + .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack", + .function = { .dmark = rvalue_stack_mark, .dfree = rvalue_stack_free, .dsize = rvalue_stack_memsize, }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, }; static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) @@ -301,50 +297,16 @@ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, static void rvalue_stack_eagerly_release(VALUE handle) { - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); -} - -/* unicode */ - -static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static uint32_t unescape_unicode(const unsigned char *p) -{ - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - return result; + if (handle) { + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + rvalue_stack_free_buffer(stack); +#else + rvalue_stack_free(stack); + RTYPEDDATA_DATA(handle) = NULL; +#endif + } } static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) @@ -373,1861 +335,277 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; + VALUE on_load_proc; VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int in_array; + ID decimal_method_id; + enum duplicate_key_action on_duplicate_key; int max_nesting; bool allow_nan; bool allow_trailing_comma; - bool parsing_name; + bool allow_control_characters; + bool allow_invalid_escape; bool symbolize_names; bool freeze; - bool create_additions; - bool deprecated_create_additions; - rvalue_cache name_cache; +} JSON_ParserConfig; + +typedef struct JSON_ParserStateStruct { + VALUE *stack_handle; + const char *start; + const char *cursor; + const char *end; rvalue_stack *stack; - VALUE stack_handle; -} JSON_Parser; + rvalue_cache name_cache; + int in_array; + int current_nesting; + unsigned int emitted_deprecations; +} JSON_ParserState; -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") +static inline size_t rest(JSON_ParserState *state) { + return state->end - state->cursor; +} -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) +static inline bool eos(JSON_ParserState *state) { + return state->cursor >= state->end; +} -#define MinusInfinity "-Infinity" -#define EVIL 0x666 +static inline char peek(JSON_ParserState *state) +{ + if (RB_UNLIKELY(eos(state))) { + return 0; + } + return *state->cursor; +} -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out) +{ + JSON_ASSERT(state->cursor <= state->end); + // Redundant but helpful for hardening + if (RB_UNLIKELY(state->cursor > state->end)) { + state->cursor = state->end; + } -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + const char *cursor = state->cursor; + long column = 0; + long line = 1; - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; + while (cursor >= state->start) { + if (*cursor-- == '\n') { + break; + } + column++; + } - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - - - -#line 465 "parser.rl" - - - -#line 447 "parser.c" -enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 32}; -enum {JSON_object_error = 0}; - -enum {JSON_object_en_main = 1}; - - -#line 505 "parser.rl" - - -#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - long stack_head = json->stack->head; - - -#line 471 "parser.c" - { - cs = JSON_object_start; - } - -#line 520 "parser.rl" - -#line 478 "parser.c" - { - short _widec; - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 123 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 47: goto st28; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st2; - goto st0; -tr2: -#line 484 "parser.rl" - { - char *np; - json->parsing_name = true; - np = JSON_parse_string(json, p, pe, result); - json->parsing_name = false; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - PUSH(*result); - {p = (( np))-1;} - } - } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 523 "parser.c" - switch( (*p) ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 58: goto st8; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr11; - case 45: goto tr11; - case 47: goto st24; - case 73: goto tr11; - case 78: goto tr11; - case 91: goto tr11; - case 102: goto tr11; - case 110: goto tr11; - case 116: goto tr11; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr11; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -tr11: -#line 473 "parser.rl" - { - char *np = JSON_parse_value(json, p, pe, result, current_nesting); - if (np == NULL) { - p--; {p++; cs = 9; goto _out;} - } else { - {p = (( np))-1;} + while (cursor >= state->start) { + if (*cursor-- == '\n') { + line++; } } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 604 "parser.c" - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) < 44 ) { - if ( 32 <= (*p) && (*p) <= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 44 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 125: goto tr4; - case 269: goto st10; - case 288: goto st10; - case 300: goto st11; - case 303: goto st16; - case 525: goto st9; - case 544: goto st9; - case 556: goto st2; - case 559: goto st20; - } - if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st9; - } else if ( _widec >= 265 ) - goto st10; - goto st0; -tr4: -#line 495 "parser.rl" - { p--; {p++; cs = 32; goto _out;} } - goto st32; -st32: - if ( ++p == pe ) - goto _test_eof32; -case 32: -#line 672 "parser.c" - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 44: goto st11; - case 47: goto st16; - case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 13: goto st11; - case 32: goto st11; - case 34: goto tr2; - case 47: goto st12; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st11; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st15; - } - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 42 ) - goto st14; - goto st13; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st11; - } - goto st13; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 10 ) - goto st11; - goto st15; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st19; - } - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 42 ) - goto st18; - goto st17; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - switch( (*p) ) { - case 42: goto st18; - case 47: goto st10; - } - goto st17; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 10 ) - goto st10; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st17; - case 303: goto st19; - case 554: goto st21; - case 559: goto st23; - } - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 554: goto st22; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st18; - case 303: goto st10; - case 554: goto st22; - case 559: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st21; - } else if ( _widec >= 128 ) - goto st17; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 482 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st10; - case 522: goto st9; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st23; - } else if ( _widec >= 128 ) - goto st19; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st27; - } - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 42 ) - goto st26; - goto st25; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - switch( (*p) ) { - case 42: goto st26; - case 47: goto st8; - } - goto st25; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 10 ) - goto st8; - goto st27; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - switch( (*p) ) { - case 42: goto st29; - case 47: goto st31; - } - goto st0; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: - if ( (*p) == 42 ) - goto st30; - goto st29; -st30: - if ( ++p == pe ) - goto _test_eof30; -case 30: - switch( (*p) ) { - case 42: goto st30; - case 47: goto st2; - } - goto st29; -st31: - if ( ++p == pe ) - goto _test_eof31; -case 31: - if ( (*p) == 10 ) - goto st2; - goto st31; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof32: cs = 32; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof30: cs = 30; goto _test_eof; - _test_eof31: cs = 31; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 521 "parser.rl" - - if (cs >= JSON_object_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(json->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); + *line_out = line; + *column_out = column; +} + +static void emit_parse_warning(const char *message, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + + VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column); + rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning); +} + +#define PARSE_ERROR_FRAGMENT_LEN 32 + +static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; + + const char *ptr = "EOF"; + if (state->cursor && state->cursor < state->end) { + ptr = state->cursor; + size_t len = 0; + while (len < PARSE_ERROR_FRAGMENT_LEN) { + char ch = ptr[len]; + if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') { + break; } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); - *result = hash; + len++; } - rvalue_stack_pop(json->stack, count); - if (RB_UNLIKELY(json->create_additions)) { - VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } else { - klassname = rb_hash_aref(*result, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } - *result = rb_funcall(klass, i_json_create, 1, *result); - } + if (len) { + buffer[0] = '\''; + MEMCPY(buffer + 1, ptr, char, len); + + while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte + len--; } - } - return p + 1; - } else { - return NULL; - } -} - - -#line 1063 "parser.c" -enum {JSON_value_start = 1}; -enum {JSON_value_first_final = 29}; -enum {JSON_value_error = 0}; - -enum {JSON_value_en_main = 1}; - - -#line 654 "parser.rl" - - -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - -#line 1079 "parser.c" - { - cs = JSON_value_start; - } - -#line 661 "parser.rl" - -#line 1086 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr3; - case 47: goto st6; - case 73: goto st10; - case 78: goto st17; - case 91: goto tr7; - case 102: goto st19; - case 110: goto st23; - case 116: goto st26; - case 123: goto tr11; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr3; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 599 "parser.rl" - { - char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { - p--; - {p++; cs = 29; goto _out;} - } else { - {p = (( np))-1;} - } - } - goto st29; -tr3: -#line 609 "parser.rl" - { - char *np; - if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - {p = (( p + 10))-1;} - p--; {p++; cs = 29; goto _out;} - } else { - raise_parse_error("unexpected token at '%s'", p); + + if (buffer[len] >= 0xC0) { // multibyte character start + len--; } - } - np = JSON_parse_number(json, p, pe, result); - if (np != NULL) { - {p = (( np))-1;} - } - p--; {p++; cs = 29; goto _out;} - } - goto st29; -tr7: -#line 627 "parser.rl" - { - char *np; - json->in_array++; - np = JSON_parse_array(json, p, pe, result, current_nesting + 1); - json->in_array--; - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} - } - goto st29; -tr11: -#line 635 "parser.rl" - { - char *np; - np = JSON_parse_object(json, p, pe, result, current_nesting + 1); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} - } - goto st29; -tr25: -#line 592 "parser.rl" - { - if (json->allow_nan) { - *result = CInfinity; - } else { - raise_parse_error("unexpected token at '%s'", p - 7); - } - } - goto st29; -tr27: -#line 585 "parser.rl" - { - if (json->allow_nan) { - *result = CNaN; - } else { - raise_parse_error("unexpected token at '%s'", p - 2); + + buffer[len + 1] = '\''; + buffer[len + 2] = '\0'; + ptr = (const char *)buffer; } } - goto st29; -tr31: -#line 579 "parser.rl" - { - *result = Qfalse; - } - goto st29; -tr34: -#line 576 "parser.rl" - { - *result = Qnil; - } - goto st29; -tr37: -#line 582 "parser.rl" - { - *result = Qtrue; - } - goto st29; -st29: - if ( ++p == pe ) - goto _test_eof29; -case 29: -#line 641 "parser.rl" - { p--; {p++; cs = 29; goto _out;} } -#line 1213 "parser.c" - switch( (*p) ) { - case 13: goto st29; - case 32: goto st29; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st29; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st29; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st29; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 110 ) - goto st11; - goto st0; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - if ( (*p) == 102 ) - goto st12; - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 105 ) - goto st13; - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - if ( (*p) == 110 ) - goto st14; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - if ( (*p) == 105 ) - goto st15; - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - if ( (*p) == 116 ) - goto st16; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - if ( (*p) == 121 ) - goto tr25; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - if ( (*p) == 97 ) - goto st18; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - if ( (*p) == 78 ) - goto tr27; - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 97 ) - goto st20; - goto st0; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - if ( (*p) == 108 ) - goto st21; - goto st0; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 115 ) - goto st22; - goto st0; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: - if ( (*p) == 101 ) - goto tr31; - goto st0; -st23: - if ( ++p == pe ) - goto _test_eof23; -case 23: - if ( (*p) == 117 ) - goto st24; - goto st0; -st24: - if ( ++p == pe ) - goto _test_eof24; -case 24: - if ( (*p) == 108 ) - goto st25; - goto st0; -st25: - if ( ++p == pe ) - goto _test_eof25; -case 25: - if ( (*p) == 108 ) - goto tr34; - goto st0; -st26: - if ( ++p == pe ) - goto _test_eof26; -case 26: - if ( (*p) == 114 ) - goto st27; - goto st0; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: - if ( (*p) == 117 ) - goto st28; - goto st0; -st28: - if ( ++p == pe ) - goto _test_eof28; -case 28: - if ( (*p) == 101 ) - goto tr37; - goto st0; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof29: cs = 29; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof23: cs = 23; goto _test_eof; - _test_eof24: cs = 24; goto _test_eof; - _test_eof25: cs = 25; goto _test_eof; - _test_eof26: cs = 26; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; - _test_eof28: cs = 28; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 662 "parser.rl" - - if (json->freeze) { - OBJ_FREEZE(*result); - } - - if (cs >= JSON_value_first_final) { - PUSH(*result); - return p; - } else { - return NULL; - } -} + VALUE message = rb_enc_sprintf(enc_utf8, format, ptr); + rb_str_catf(message, " at line %ld column %ld", line, column); + return message; +} -#line 1469 "parser.c" -enum {JSON_integer_start = 1}; -enum {JSON_integer_first_final = 3}; -enum {JSON_integer_error = 0}; +static VALUE parse_error_new(VALUE message, long line, long column) +{ + VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message); + rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line)); + rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column)); + return exc; +} -enum {JSON_integer_en_main = 1}; +NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + VALUE message = build_parse_error_message(format, state, line, column); + rb_exc_raise(parse_error_new(message, line, column)); +} +NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at) +{ + state->cursor = at; + raise_parse_error(format, state); +} -#line 683 "parser.rl" +/* unicode */ +static const signed char digit_values[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, + -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1 +}; -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) +static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe) { - bool negative = false; - if (*p == '-') { - negative = true; - p++; + if (RB_UNLIKELY(sp > spe - 4)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } + const unsigned char *p = (const unsigned char *)sp; - if (negative) { - memo = -memo; + const signed char b0 = digit_values[p[0]]; + const signed char b1 = digit_values[p[1]]; + const signed char b2 = digit_values[p[2]]; + const signed char b3 = digit_values[p[3]]; + + if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); } - return LL2NUM(memo); + + return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3; } -static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) +#define GET_PARSER_CONFIG \ + JSON_ParserConfig *config; \ + TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config) + +static const rb_data_type_t JSON_ParserConfig_type; + +NOINLINE(static) void +json_eat_comments(JSON_ParserState *state) { - long len = p - json->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(json->memo, p); - } else { - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); - } - return p + 1; -} - - -#line 1517 "parser.c" -enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 6}; -enum {JSON_float_error = 0}; - -enum {JSON_float_en_main = 1}; - - -#line 735 "parser.rl" - - -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - bool is_float = false; - - -#line 1534 "parser.c" - { - cs = JSON_float_start; - } - -#line 743 "parser.rl" - json->memo = p; - -#line 1542 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st6; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st6; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st10; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr7; -tr7: -#line 727 "parser.rl" - { p--; {p++; cs = 7; goto _out;} } - goto st7; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: -#line 1589 "parser.c" - goto st0; -tr8: -#line 728 "parser.rl" - { is_float = true; } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1599 "parser.c" - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 69: goto st4; - case 101: goto st4; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) >= 45 ) - goto st0; - goto tr7; -tr9: -#line 728 "parser.rl" - { is_float = true; } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1625 "parser.c" - switch( (*p) ) { - case 43: goto st5; - case 45: goto st5; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 69: goto st0; - case 101: goto st0; - } - if ( (*p) > 46 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st9; - } else if ( (*p) >= 45 ) - goto st0; - goto tr7; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - switch( (*p) ) { - case 45: goto st0; - case 46: goto tr8; - case 69: goto tr9; - case 101: goto tr9; - } - if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; - goto tr7; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 745 "parser.rl" - - if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(json, p, result); + const char *start = state->cursor; + state->cursor++; + + switch (peek(state)) { + case '/': { + state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); + if (!state->cursor) { + state->cursor = state->end; + } else { + state->cursor++; + } + break; } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); + case '*': { + state->cursor++; - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); + while (true) { + const char *next_match = memchr(state->cursor, '*', state->end - state->cursor); + if (!next_match) { + raise_parse_error_at("unterminated comment, expected closing '*/'", state, start); + } + + state->cursor = next_match + 1; + if (peek(state) == '/') { + state->cursor++; + break; } } + break; } + default: + raise_parse_error_at("unexpected token %s", state, start); + break; + } +} - long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); - } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); - } +ALWAYS_INLINE(static) void +json_eat_whitespace(JSON_ParserState *state) +{ + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + if (chunk == 0x2020202020202020) { + state->cursor += 8; + continue; + } - return p + 1; - } else { - return NULL; - } -} - - - -#line 1738 "parser.c" -enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 22}; -enum {JSON_array_error = 0}; - -enum {JSON_array_en_main = 1}; - - -#line 825 "parser.rl" - - -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - long stack_head = json->stack->head; - - -#line 1759 "parser.c" - { - cs = JSON_array_start; - } - -#line 837 "parser.rl" - -#line 1766 "parser.c" - { - short _widec; - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 91 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 13: goto st2; - case 32: goto st2; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st18; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st2; - goto st0; -tr2: -#line 805 "parser.rl" - { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v, current_nesting); - if (np == NULL) { - p--; {p++; cs = 3; goto _out;} - } else { - {p = (( np))-1;} - } - } - goto st3; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: -#line 1821 "parser.c" - _widec = (*p); - if ( 44 <= (*p) && (*p) <= 44 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 13: goto st3; - case 32: goto st3; - case 47: goto st4; - case 93: goto tr4; - case 300: goto st8; - case 556: goto st13; - } - if ( 9 <= _widec && _widec <= 10 ) - goto st3; - goto st0; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st5; - case 47: goto st7; - } - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 42 ) - goto st6; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st3; - } - goto st5; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 10 ) - goto st3; - goto st7; -tr4: -#line 817 "parser.rl" - { p--; {p++; cs = 22; goto _out;} } - goto st22; -st22: - if ( ++p == pe ) - goto _test_eof22; -case 22: -#line 1880 "parser.c" - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 13: goto st8; - case 32: goto st8; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st9; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st8; - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 42: goto st10; - case 47: goto st12; - } - goto st0; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: - if ( (*p) == 42 ) - goto st11; - goto st10; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 42: goto st11; - case 47: goto st8; - } - goto st10; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - if ( (*p) == 10 ) - goto st8; - goto st12; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - _widec = (*p); - if ( (*p) < 13 ) { - if ( (*p) > 9 ) { - if ( 10 <= (*p) && (*p) <= 10 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 13 ) { - if ( (*p) > 32 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 32 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 34: goto tr2; - case 45: goto tr2; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 93: goto tr4; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - case 269: goto st8; - case 288: goto st8; - case 303: goto st9; - case 525: goto st13; - case 544: goto st13; - case 559: goto st14; - } - if ( _widec < 265 ) { - if ( 48 <= _widec && _widec <= 57 ) - goto tr2; - } else if ( _widec > 266 ) { - if ( 521 <= _widec && _widec <= 522 ) - goto st13; - } else - goto st8; - goto st0; -st14: - if ( ++p == pe ) - goto _test_eof14; -case 14: - _widec = (*p); - if ( (*p) > 42 ) { - if ( 47 <= (*p) && (*p) <= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st10; - case 303: goto st12; - case 554: goto st15; - case 559: goto st17; - } - goto st0; -st15: - if ( ++p == pe ) - goto _test_eof15; -case 15: - _widec = (*p); - if ( (*p) < 42 ) { - if ( (*p) <= 41 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 42 ) { - if ( 43 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 554: goto st16; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st16: - if ( ++p == pe ) - goto _test_eof16; -case 16: - _widec = (*p); - if ( (*p) < 43 ) { - if ( (*p) > 41 ) { - if ( 42 <= (*p) && (*p) <= 42 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 46 ) { - if ( (*p) > 47 ) { - if ( 48 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) >= 47 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 298: goto st11; - case 303: goto st8; - case 554: goto st16; - case 559: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st15; - } else if ( _widec >= 128 ) - goto st10; - goto st0; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: - _widec = (*p); - if ( (*p) < 10 ) { - if ( (*p) <= 9 ) { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else if ( (*p) > 10 ) { - if ( 11 <= (*p) ) - { _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - } else { - _widec = (short)(128 + ((*p) - -128)); - if ( -#line 815 "parser.rl" - json->allow_trailing_comma ) _widec += 256; - } - switch( _widec ) { - case 266: goto st8; - case 522: goto st13; - } - if ( _widec > 383 ) { - if ( 384 <= _widec && _widec <= 639 ) - goto st17; - } else if ( _widec >= 128 ) - goto st12; - goto st0; -st18: - if ( ++p == pe ) - goto _test_eof18; -case 18: - switch( (*p) ) { - case 42: goto st19; - case 47: goto st21; - } - goto st0; -st19: - if ( ++p == pe ) - goto _test_eof19; -case 19: - if ( (*p) == 42 ) - goto st20; - goto st19; -st20: - if ( ++p == pe ) - goto _test_eof20; -case 20: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st2; - } - goto st19; -st21: - if ( ++p == pe ) - goto _test_eof21; -case 21: - if ( (*p) == 10 ) - goto st2; - goto st21; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof22: cs = 22; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - _test_eof14: cs = 14; goto _test_eof; - _test_eof15: cs = 15; goto _test_eof; - _test_eof16: cs = 16; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; - _test_eof18: cs = 18; goto _test_eof; - _test_eof19: cs = 19; goto _test_eof; - _test_eof20: cs = 20; goto _test_eof; - _test_eof21: cs = 21; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 838 "parser.rl" - - if(cs >= JSON_array_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(json->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); - *result = array; + uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + state->cursor += consecutive_spaces; + break; + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; } - rvalue_stack_pop(json->stack, count); - - return p + 1; - } else { - raise_parse_error("unexpected token at '%s'", p); - return NULL; } } @@ -2257,16 +635,27 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +static inline bool json_string_cacheable_p(const char *string, size_t length) { + // We mostly want to cache strings that are likely to be repeated. + // Simple heuristics: + // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold. + // - If the first character isn't a letter, we're much less likely to see this string again. + return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]); +} + +static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; size_t bufferSize = stringEnd - string; - if (is_name && json->in_array) { + if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); } if (RB_LIKELY(cached_key)) { @@ -2277,323 +666,798 @@ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringE return build_string(string, stringEnd, intern, symbolize); } -static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; - int unescape_len; - char buf[4]; +#define JSON_MAX_UNESCAPE_POSITIONS 16 +typedef struct _json_unescape_positions { + long size; + const char **positions; + unsigned long additional_backslashes; +} JSON_UnescapePositions; - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; +static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions) +{ + while (positions->size) { + positions->size--; + const char *next_position = positions->positions[0]; + positions->positions++; + if (next_position >= pe) { + return next_position; } } - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); + if (positions->additional_backslashes) { + positions->additional_backslashes--; + return memchr(pe, '\\', stringEnd - pe); } + return NULL; +} + +NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + const char *p = string, *pe = string, *bufferStart; + char *buffer; + VALUE result = rb_str_buf_new(bufferSize); rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); - } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } + buffer = RSTRING_PTR(result); + bufferStart = buffer; + +#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe; + + while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) { + if (pe > p) { + MEMCPY(buffer, p, char, pe - p); + buffer += pe - p; + } + switch (*++pe) { + case '"': + case '/': + p = pe; // nothing to unescape just need to skip the backslash + break; + case '\\': + APPEND_CHAR('\\'); + break; + case 'n': + APPEND_CHAR('\n'); + break; + case 'r': + APPEND_CHAR('\r'); + break; + case 't': + APPEND_CHAR('\t'); + break; + case 'b': + APPEND_CHAR('\b'); + break; + case 'f': + APPEND_CHAR('\f'); + break; + case 'u': { + uint32_t ch = unescape_unicode(state, ++pe, stringEnd); + pe += 3; + /* To handle values above U+FFFF, we take a sequence of + * \uXXXX escapes in the U+D800..U+DBFF then + * U+DC00..U+DFFF ranges, take the low 10 bits from each + * to make a 20-bit number, then add 0x10000 to get the + * final codepoint. + * + * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling + * Surrogate Pairs in UTF-16", and 23.6 "Surrogates + * Area". + */ + if ((ch & 0xFC00) == 0xD800) { + pe++; + if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) { + uint32_t sur = unescape_unicode(state, pe + 2, stringEnd); + + if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) { + raise_parse_error_at("invalid surrogate pair at %s", state, p); } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; + + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); + pe += 5; + } else { + raise_parse_error_at("incomplete surrogate pair at %s", state, p); + break; } - break; - default: - p = pe; - continue; + } + + int unescape_len = convert_UTF32_to_UTF8(buffer, ch); + buffer += unescape_len; + p = ++pe; + break; } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; - } else { - pe++; + default: + if ((unsigned char)*pe < 0x20) { + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } + } + + if (config->allow_invalid_escape) { + APPEND_CHAR(*pe); + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); + } + break; } } +#undef APPEND_CHAR - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; + if (stringEnd > p) { + MEMCPY(buffer, p, char, stringEnd - p); + buffer += stringEnd - p; } rb_str_set_len(result, buffer - bufferStart); if (symbolize) { result = rb_str_intern(result); } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + result = rb_str_to_interned_str(result); } return result; } +#define MAX_FAST_INTEGER_SIZE 18 +#define MAX_NUMBER_STACK_BUFFER 128 + +typedef VALUE (*json_number_decode_func_t)(const char *ptr); + +static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func) +{ + if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) { + char buffer[MAX_NUMBER_STACK_BUFFER]; + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + return func(buffer); + } else { + VALUE buffer_v = rb_str_tmp_new(len); + char *buffer = RSTRING_PTR(buffer_v); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = func(buffer); + RB_GC_GUARD(buffer_v); + return number; + } +} + +static VALUE json_decode_inum(const char *buffer) +{ + return rb_cstr2inum(buffer, 10); +} -#line 2403 "parser.c" -enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 9}; -enum {JSON_string_error = 0}; +NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len) +{ + return json_decode_large_number(start, len, json_decode_inum); +} -enum {JSON_string_en_main = 1}; +static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end) +{ + if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) { + if (negative) { + return INT64T2NUM(-((int64_t)mantissa)); + } + return UINT64T2NUM(mantissa); + } + return json_decode_large_integer(start, end - start); +} -#line 1061 "parser.rl" +static VALUE json_decode_dnum(const char *buffer) +{ + return DBL2NUM(rb_cstr_to_dbl(buffer, 1)); +} +NOINLINE(static) VALUE json_decode_large_float(const char *start, long len) +{ + return json_decode_large_number(start, len, json_decode_dnum); +} -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) +/* Ruby JSON optimized float decoder using vendored Ryu algorithm + * Accepts pre-extracted mantissa and exponent from first-pass validation + */ +static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative, + const char *start, const char *end) { - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; + if (RB_UNLIKELY(config->decimal_class)) { + VALUE text = rb_str_new(start, end - start); + return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text); } - return ST_CONTINUE; + + if (RB_UNLIKELY(exponent > INT32_MAX)) { + return negative ? CMinusInfinity : CInfinity; + } + + if (RB_UNLIKELY(exponent < INT32_MIN)) { + return rb_float_new(negative ? -0.0 : 0.0); + } + + // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case) + // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308) + if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) { + return json_decode_large_float(start, end - start); + } + + return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative)); } -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - -#line 2432 "parser.c" - { - cs = JSON_string_start; - } - -#line 1081 "parser.rl" - json->memo = p; - -#line 2440 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - if ( (*p) == 34 ) - goto st2; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 34: goto tr2; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st2; -tr2: -#line 1043 "parser.rl" - { - *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } -#line 1036 "parser.rl" - { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -tr6: -#line 1036 "parser.rl" - { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - {p = (( p + 1))-1;} - p--; - {p++; cs = 9; goto _out;} - } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 2493 "parser.c" - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 117 ) - goto st5; - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 34: goto tr6; - case 92: goto st3; - } - if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) - goto st0; - goto st4; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st6; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st6; - } else - goto st6; - goto st0; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st7; - } else - goto st7; - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st8; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st8; - } else - goto st8; - goto st0; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st4; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st4; - } else - goto st4; - goto st0; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1083 "parser.rl" - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; +static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count) +{ + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); + rvalue_stack_pop(state->stack, count); + + if (config->freeze) { + RB_OBJ_FREEZE(array); + } + + return array; +} + +static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs) +{ + VALUE set = rb_hash_new_capa(count / 2); + for (size_t index = 0; index < count; index += 2) { + size_t before = RHASH_SIZE(set); + VALUE key = pairs[index]; + rb_hash_aset(set, key, Qtrue); + if (RHASH_SIZE(set) == before) { + if (RB_SYMBOL_P(key)) { + return rb_sym2str(key); + } + return key; + } + } + return Qfalse; +} + +NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", + rb_inspect(duplicate_key) + ); + + emit_parse_warning(RSTRING_PTR(message), state); + RB_GC_GUARD(message); +} + +NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key) +{ + VALUE message = rb_sprintf( + "duplicate key %"PRIsVALUE, + rb_inspect(duplicate_key) + ); + + long line, column; + cursor_position(state, &line, &column); + rb_str_concat(message, build_parse_error_message("", state, line, column)) ; + rb_exc_raise(parse_error_new(message, line, column)); +} + +static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count) +{ + size_t entries_count = count / 2; + VALUE object = rb_hash_new_capa(entries_count); + const VALUE *pairs = rvalue_stack_peek(state->stack, count); + rb_hash_bulk_insert(count, pairs, object); + + if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) { + switch (config->on_duplicate_key) { + case JSON_IGNORE: + break; + case JSON_DEPRECATED: + // Only emit the first few deprecations to avoid spamming. + if (state->emitted_deprecations < 5) { + emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); + state->emitted_deprecations++; + } + + break; + case JSON_RAISE: + raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs)); + break; + } + } + + rvalue_stack_pop(state->stack, count); + + if (config->freeze) { + RB_OBJ_FREEZE(object); + } + + return object; +} + +static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value) +{ + if (RB_UNLIKELY(config->on_load_proc)) { + value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil); + } + rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack); + return value; +} + +static const bool string_scan_table[256] = { + // ASCII Control Characters + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // ASCII Characters + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +#ifdef HAVE_SIMD +static SIMD_Implementation simd_impl = SIMD_NONE; +#endif /* HAVE_SIMD */ + +ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + + uint64_t mask = 0; + if (string_scan_simd_neon(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros64(mask) >> 2; + return true; + } + +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + int mask = 0; + if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros(mask); + return true; + } + } +#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ +#endif /* HAVE_SIMD */ + + while (!eos(state)) { + if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { + return true; + } + state->cursor++; + } + + // If the string ended with an unterminated escape sequence, we might + // have gone past the end. + if (RB_UNLIKELY(state->cursor > state->end)) { + state->cursor = state->end; + } + + return false; +} + +static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start) +{ + const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS]; + JSON_UnescapePositions positions = { + .size = 0, + .positions = backslashes, + .additional_backslashes = 0, + }; + + do { + switch (*state->cursor) { + case '"': { + VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions); + state->cursor++; + return json_push_value(state, config, string); + } + case '\\': { + if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) { + backslashes[positions.size] = state->cursor; + positions.size++; + } else { + positions.additional_backslashes++; + } + state->cursor++; + break; + } + default: + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } + break; + } + + state->cursor++; + } while (string_scan(state)); + + raise_parse_error("unexpected end of input, expected closing \"", state); + return Qfalse; +} + +ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) +{ + state->cursor++; + const char *start = state->cursor; + + if (RB_UNLIKELY(!string_scan(state))) { + raise_parse_error("unexpected end of input, expected closing \"", state); + } + + if (RB_LIKELY(*state->cursor == '"')) { + VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name); + state->cursor++; + return json_push_value(state, config, string); + } + return json_parse_escaped_string(state, config, is_name, start); +} + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +// Additional References: +// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html +static inline uint64_t decode_8digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; +} + +static inline uint64_t decode_4digits_unrolled(uint32_t val) { + const uint32_t mask = 0x000000FF; + const uint32_t mul1 = 100; + val -= 0x30303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = ((val & mask) * mul1) + (((val >> 16) & mask)); + return val; +} +#endif + +static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) +{ + const char *start = state->cursor; + +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) >= sizeof(uint64_t)) { + uint64_t next_8bytes; + memcpy(&next_8bytes, state->cursor, sizeof(uint64_t)); + + // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333 + // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html + uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4); + + if (match == 0x3333333333333333) { // 8 consecutive digits + *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes); + state->cursor += 8; + continue; + } + + uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT; + + if (consecutive_digits >= 4) { + *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); + state->cursor += 4; + consecutive_digits -= 4; + } + + while (consecutive_digits) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + consecutive_digits--; + state->cursor++; + } + + return (int)(state->cursor - start); + } +#endif + + char next_char; + while (rb_isdigit(next_char = peek(state))) { + *accumulator = *accumulator * 10 + (next_char - '0'); + state->cursor++; + } + return (int)(state->cursor - start); +} + +static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) +{ + bool integer = true; + const char first_digit = *state->cursor; + + // Variables for Ryu optimization - extract digits during parsing + int64_t exponent = 0; + int decimal_point_pos = -1; + uint64_t mantissa = 0; + + // Parse integer part and extract mantissa digits + int mantissa_digits = json_parse_digits(state, &mantissa); + + if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) { + raise_parse_error_at("invalid number: %s", state, start); + } + + // Parse fractional part + if (peek(state) == '.') { + integer = false; + decimal_point_pos = mantissa_digits; // Remember position of decimal point + state->cursor++; + + int fractional_digits = json_parse_digits(state, &mantissa); + mantissa_digits += fractional_digits; + + if (RB_UNLIKELY(!fractional_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + } + + // Parse exponent + if (rb_tolower(peek(state)) == 'e') { + integer = false; + state->cursor++; + + bool negative_exponent = false; + const char next_char = peek(state); + if (next_char == '-' || next_char == '+') { + negative_exponent = next_char == '-'; + state->cursor++; + } + + uint64_t abs_exponent = 0; + int exponent_digits = json_parse_digits(state, &abs_exponent); + + if (RB_UNLIKELY(!exponent_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) { + exponent = negative_exponent ? INT64_MIN : INT64_MAX; + } else { + exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent; + } + } + + if (integer) { + return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor); + } + + // Adjust exponent based on decimal point position + if (decimal_point_pos >= 0) { + exponent -= (mantissa_digits - decimal_point_pos); + } + + return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor); +} + +static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + return json_parse_number(state, config, false, state->cursor); +} + +static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + const char *start = state->cursor; + state->cursor++; + return json_parse_number(state, config, true, start); +} + +static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) +{ + json_eat_whitespace(state); + + switch (peek(state)) { + case 'n': + if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { + state->cursor += 4; + return json_push_value(state, config, Qnil); + } + + raise_parse_error("unexpected token %s", state); + break; + case 't': + if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { + state->cursor += 4; + return json_push_value(state, config, Qtrue); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'f': + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { + state->cursor += 5; + return json_push_value(state, config, Qfalse); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { + state->cursor += 3; + return json_push_value(state, config, CNaN); + } + + raise_parse_error("unexpected token %s", state); + break; + case 'I': + if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { + state->cursor += 8; + return json_push_value(state, config, CInfinity); + } + + raise_parse_error("unexpected token %s", state); + break; + case '-': { + // Note: memcmp with a small power of two compile to an integer comparison + if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { + if (config->allow_nan) { + state->cursor += 9; + return json_push_value(state, config, CMinusInfinity); + } else { + raise_parse_error("unexpected token %s", state); + } + } + return json_push_value(state, config, json_parse_negative_number(state, config)); + break; + } + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + return json_push_value(state, config, json_parse_positive_number(state, config)); + break; + case '"': { + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + return json_parse_string(state, config, false); + break; + } + case '[': { + state->cursor++; + json_eat_whitespace(state); + long stack_head = state->stack->head; + + if (peek(state) == ']') { + state->cursor++; + return json_push_value(state, config, json_decode_array(state, config, 0)); + } else { + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + state->in_array++; + json_parse_any(state, config); + } + + while (true) { + json_eat_whitespace(state); + + const char next_char = peek(state); + + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == ']') { + continue; + } + } + json_parse_any(state, config); + continue; + } + + if (next_char == ']') { + state->cursor++; + long count = state->stack->head - stack_head; + state->current_nesting--; + state->in_array--; + return json_push_value(state, config, json_decode_array(state, config, count)); + } + + raise_parse_error("expected ',' or ']' after array value", state); + } + break; + } + case '{': { + const char *object_start_cursor = state->cursor; + + state->cursor++; + json_eat_whitespace(state); + long stack_head = state->stack->head; + + if (peek(state) == '}') { + state->cursor++; + return json_push_value(state, config, json_decode_object(state, config, 0)); + } else { + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); + } + + if (peek(state) != '"') { + raise_parse_error("expected object key, got %s", state); + } + json_parse_string(state, config, true); + + json_eat_whitespace(state); + if (peek(state) != ':') { + raise_parse_error("expected ':' after object key", state); + } + state->cursor++; + + json_parse_any(state, config); + } + + while (true) { + json_eat_whitespace(state); + + const char next_char = peek(state); + if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = state->stack->head - stack_head; + + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = object_start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; + + return json_push_value(state, config, object); + } + + if (next_char == ',') { + state->cursor++; + json_eat_whitespace(state); + + if (config->allow_trailing_comma) { + if (peek(state) == '}') { + continue; + } + } + + if (RB_UNLIKELY(peek(state) != '"')) { + raise_parse_error("expected object key, got: %s", state); + } + json_parse_string(state, config, true); + + json_eat_whitespace(state); + if (RB_UNLIKELY(peek(state) != ':')) { + raise_parse_error("expected ':' after object key, got: %s", state); + } + state->cursor++; + + json_parse_any(state, config); + + continue; + } + + raise_parse_error("expected ',' or '}' after object value, got: %s", state); + } + break; + } + + case 0: + raise_parse_error("unexpected end of input", state); + break; + + default: + raise_parse_error("unexpected character: %s", state); + break; + } + + raise_parse_error("unreachable: %s", state); + return Qundef; +} + +static void json_ensure_eof(JSON_ParserState *state) +{ + json_eat_whitespace(state); + if (!eos(state)) { + raise_parse_error("unexpected token at end of stream %s", state); } } @@ -2611,86 +1475,106 @@ case 8: static VALUE convert_encoding(VALUE source) { - int encindex = RB_ENCODING_GET(source); + StringValue(source); + int encindex = RB_ENCODING_GET(source); + + if (RB_LIKELY(encindex == utf8_encindex)) { + return source; + } + + if (encindex == binary_encindex) { + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + } - if (RB_LIKELY(encindex == utf8_encindex)) { + source = rb_funcall(source, i_encode, 1, Encoding_UTF_8); + StringValue(source); return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; +} + +struct parser_config_init_args { + JSON_ParserConfig *config; + VALUE self; +}; + +static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val) +{ + *dest = val; + if (self) RB_OBJ_WRITTEN(self, Qundef, val); +} + +static int parser_config_init_i(VALUE key, VALUE val, VALUE data) +{ + struct parser_config_init_args *args = (struct parser_config_init_args *)data; + JSON_ParserConfig *config = args->config; + VALUE self = args->self; + + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { parser_config_wb_write(self, &config->on_load_proc, RTEST(val) ? val : Qfalse); } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { + if (RTEST(val)) { + if (rb_respond_to(val, i_try_convert)) { + parser_config_wb_write(self, &config->decimal_class, val); + config->decimal_method_id = i_try_convert; + } else if (rb_respond_to(val, i_new)) { + parser_config_wb_write(self, &config->decimal_class, val); + config->decimal_method_id = i_new; + } else if (RB_TYPE_P(val, T_CLASS)) { + VALUE name = rb_class_name(val); + const char *name_cstr = RSTRING_PTR(name); + const char *last_colon = strrchr(name_cstr, ':'); + if (last_colon) { + const char *mod_path_end = last_colon - 1; + VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); + parser_config_wb_write(self, &config->decimal_class, rb_path_to_class(mod_path)); + + const char *method_name_beg = last_colon + 1; + long before_len = method_name_beg - name_cstr; + long len = RSTRING_LEN(name) - before_len; + VALUE method_name = rb_str_substr(name, before_len, len); + config->decimal_method_id = SYM2ID(rb_str_intern(method_name)); + } else { + parser_config_wb_write(self, &config->decimal_class, rb_mKernel); + config->decimal_method_id = SYM2ID(rb_str_intern(name)); + } + } } } return ST_CONTINUE; } -static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self) { - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } + config->max_nesting = 100; - json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - json->max_nesting = 100; + struct parser_config_init_args args = { + .config = config, + .self = self, + }; if (!NIL_P(opts)) { Check_Type(opts, T_HASH); if (RHASH_SIZE(opts) > 0) { // We assume in most cases few keys are set so it's faster to go over // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } + rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); } } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; } /* - * call-seq: new(source, opts => {}) + * call-seq: new(opts => {}) * - * Creates a new JSON::Ext::Parser instance for the string _source_. + * Creates a new JSON::Ext::ParserConfig instance. * * It will be configured by the _opts_ hash. _opts_ can have the following * keys: @@ -2706,57 +1590,30 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) * (keys) in a JSON object. Otherwise strings are returned, which is * also the default. It's not possible to use this option in * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. * * *decimal_class*: Specifies which class to use instead of the default * (Float) when parsing decimal numbers. This class must accept a single * string argument in its constructor. */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) +static VALUE cParserConfig_initialize(VALUE self, VALUE opts) { - GET_PARSER_INIT; + rb_check_frozen(self); + GET_PARSER_CONFIG; - rb_check_arity(argc, 1, 2); + parser_config_init(config, opts, self); - parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } - -#line 2734 "parser.c" -enum {JSON_start = 1}; -enum {JSON_first_final = 10}; -enum {JSON_error = 0}; - -enum {JSON_en_main = 1}; - - -#line 1249 "parser.rl" - - -/* - * call-seq: parse() - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - * It raises JSON::ParserError if fail to parse. - */ -static VALUE cParser_parse(VALUE self) +static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src) { - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; + VALUE Vsource = convert_encoding(src); - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + // Ensure the string isn't mutated under us. + // The classic API to use is `rb_str_locktmp`, but then we'd + // need to use `rb_protect` to make sure we always unlock. + if (Vsource == src) { + Vsource = rb_str_new_frozen(Vsource); + } VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; rvalue_stack stack = { @@ -2764,398 +1621,82 @@ static VALUE cParser_parse(VALUE self) .ptr = rvalue_stack_buffer, .capa = RVALUE_STACK_INITIAL_CAPA, }; - json->stack = &stack; - - -#line 2771 "parser.c" - { - cs = JSON_start; - } - -#line 1277 "parser.rl" - p = json->source; - pe = p + json->len; - -#line 2780 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 1241 "parser.rl" - { - char *np = JSON_parse_value(json, p, pe, &result, 0); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 2824 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1280 "parser.rl" - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} -static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, source, opts); + long len; + const char *start; - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + RSTRING_GETMEM(Vsource, start, len); - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, + VALUE stack_handle = 0; + JSON_ParserState _state = { + .start = start, + .cursor = start, + .end = start + len, + .stack = &stack, + .stack_handle = &stack_handle, }; - json->stack = &stack; - - -#line 2949 "parser.c" - { - cs = JSON_start; - } - -#line 1315 "parser.rl" - p = json->source; - pe = p + json->len; - -#line 2958 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 13: goto st1; - case 32: goto st1; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st6; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; - } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st1; - goto st0; -st0: -cs = 0; - goto _out; -tr2: -#line 1241 "parser.rl" - { - char *np = JSON_parse_value(json, p, pe, &result, 0); - if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} - } - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 3002 "parser.c" - switch( (*p) ) { - case 13: goto st10; - case 32: goto st10; - case 47: goto st2; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st10; - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 42: goto st3; - case 47: goto st5; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( (*p) == 42 ) - goto st4; - goto st3; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 42: goto st4; - case 47: goto st10; - } - goto st3; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( (*p) == 10 ) - goto st10; - goto st5; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st9; - } - goto st0; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - if ( (*p) == 42 ) - goto st8; - goto st7; -st8: - if ( ++p == pe ) - goto _test_eof8; -case 8: - switch( (*p) ) { - case 42: goto st8; - case 47: goto st1; - } - goto st7; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - if ( (*p) == 10 ) - goto st1; - goto st9; - } - _test_eof1: cs = 1; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - - _test_eof: {} - _out: {} - } - -#line 1318 "parser.rl" - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } + JSON_ParserState *state = &_state; + + VALUE result = json_parse_any(state, config); + + // This may be skipped in case of exception, but + // it won't cause a leak. + rvalue_stack_eagerly_release(stack_handle); + RB_GC_GUARD(stack_handle); + RB_GC_GUARD(Vsource); + json_ensure_eof(state); + + return result; +} + +/* + * call-seq: parse(source) + * + * Parses the current JSON text _source_ and returns the complete data + * structure as a result. + * It raises JSON::ParserError if fail to parse. + */ +static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) +{ + GET_PARSER_CONFIG; + return cParser_parse(config, Vsource); } -static void JSON_mark(void *ptr) +static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) { - JSON_Parser *json = ptr; - rb_gc_mark(json->Vsource); - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); - rb_gc_mark(json->stack_handle); + JSON_ParserConfig _config = {0}; + JSON_ParserConfig *config = &_config; + parser_config_init(config, opts, false); - long index; - for (index = 0; index < json->name_cache.length; index++) { - rb_gc_mark(json->name_cache.entries[index]); - } + return cParser_parse(config, Vsource); } -static void JSON_free(void *ptr) +static void JSON_ParserConfig_mark(void *ptr) { - JSON_Parser *json = ptr; - fbuffer_free(&json->fbuffer); - ruby_xfree(json); + JSON_ParserConfig *config = ptr; + rb_gc_mark(config->on_load_proc); + rb_gc_mark(config->decimal_class); } -static size_t JSON_memsize(const void *ptr) +static size_t JSON_ParserConfig_memsize(const void *ptr) { - const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); + return sizeof(JSON_ParserConfig); } -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, +static const rb_data_type_t JSON_ParserConfig_type = { + .wrap_struct_name = "JSON::Ext::Parser/ParserConfig", + .function = { + JSON_ParserConfig_mark, + RUBY_DEFAULT_FREE, + JSON_ParserConfig_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static VALUE cJSON_parser_s_allocate(VALUE klass) { - JSON_Parser *json; - VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); - return obj; -} - -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) -{ - GET_PARSER; - return rb_str_dup(json->Vsource); + JSON_ParserConfig *config; + return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config); } void Init_parser(void) @@ -3167,15 +1708,15 @@ void Init_parser(void) #undef rb_intern rb_require("json/common"); mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); + VALUE mExt = rb_define_module_under(mJSON, "Ext"); + VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject); eNestingError = rb_path2class("JSON::NestingError"); rb_gc_register_mark_object(eNestingError); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); + rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate); + rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1); + rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1); + VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject); rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); CNaN = rb_const_get(mJSON, rb_intern("NaN")); @@ -3193,24 +1734,14 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); + sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); + sym_on_load = ID2SYM(rb_intern("on_load")); sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - - i_create_id = rb_intern("create_id"); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_chr = rb_intern("chr"); - i_match = rb_intern("match"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); i_uminus = rb_intern("-@"); @@ -3219,12 +1750,8 @@ void Init_parser(void) binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); enc_utf8 = rb_utf8_encoding(); -} -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ +#ifdef HAVE_SIMD + simd_impl = find_simd_implementation(); +#endif +} diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl deleted file mode 100644 index ac9493454d..0000000000 --- a/ext/json/parser/parser.rl +++ /dev/null @@ -1,1457 +0,0 @@ -#include "ruby.h" -#include "../fbuffer/fbuffer.h" - -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, - i_chr, i_deep_const_get, i_match, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; - -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_create_additions, sym_create_id, sym_object_class, sym_array_class, - sym_decimal_class, sym_match_string; - -static int binary_encindex; -static int utf8_encindex; - -#ifdef HAVE_RB_CATEGORY_WARN -# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) -#else -# define json_deprecated(message) rb_warn(message) -#endif - -static const char deprecated_create_additions_warning[] = - "JSON.load implicit support for `create_additions: true` is deprecated " - "and will be removed in 3.0, use JSON.unsafe_load or explicitly " - "pass `create_additions: true`"; - -#ifndef HAVE_RB_HASH_BULK_INSERT -// For TruffleRuby -void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) -{ - long index = 0; - while (index < count) { - VALUE name = pairs[index++]; - VALUE value = pairs[index++]; - rb_hash_aset(hash, name, value); - } - RB_GC_GUARD(hash); -} -#endif - -/* name cache */ - -#include <string.h> -#include <ctype.h> - -// Object names are likely to be repeated, and are frozen. -// As such we can re-use them if we keep a cache of the ones we've seen so far, -// and save much more expensive lookups into the global fstring table. -// This cache implementation is deliberately simple, as we're optimizing for compactness, -// to be able to fit safely on the stack. -// As such, binary search into a sorted array gives a good tradeoff between compactness and -// performance. -#define JSON_RVALUE_CACHE_CAPA 63 -typedef struct rvalue_cache_struct { - int length; - VALUE entries[JSON_RVALUE_CACHE_CAPA]; -} rvalue_cache; - -static rb_encoding *enc_utf8; - -#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 - -static inline VALUE build_interned_string(const char *str, const long length) -{ -# ifdef HAVE_RB_ENC_INTERNED_STR - return rb_enc_interned_str(str, length, enc_utf8); -# else - VALUE rstring = rb_utf8_str_new(str, length); - return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); -# endif -} - -static inline VALUE build_symbol(const char *str, const long length) -{ - return rb_str_intern(build_interned_string(str, length)); -} - -static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) -{ - MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); - cache->length++; - cache->entries[index] = rstring; -} - -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) -{ - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); - } -} - -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rstring = build_interned_string(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); - } - return rstring; -} - -static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) -{ - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha(str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - - int low = 0; - int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; - - while (low <= high) { - mid = (high + low) >> 1; - VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - - if (last_cmp == 0) { - return entry; - } else if (last_cmp > 0) { - low = mid + 1; - } else { - high = mid - 1; - } - } - - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - - VALUE rsymbol = build_symbol(str, length); - - if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); - } - return rsymbol; -} - -/* rvalue stack */ - -#define RVALUE_STACK_INITIAL_CAPA 128 - -enum rvalue_stack_type { - RVALUE_STACK_HEAP_ALLOCATED = 0, - RVALUE_STACK_STACK_ALLOCATED = 1, -}; - -typedef struct rvalue_stack_struct { - enum rvalue_stack_type type; - long capa; - long head; - VALUE *ptr; -} rvalue_stack; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); - -static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) -{ - long required = stack->capa * 2; - - if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { - stack = rvalue_stack_spill(stack, handle, stack_ref); - } else { - REALLOC_N(stack->ptr, VALUE, required); - stack->capa = required; - } - return stack; -} - -static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) -{ - if (RB_UNLIKELY(stack->head >= stack->capa)) { - stack = rvalue_stack_grow(stack, handle, stack_ref); - } - stack->ptr[stack->head] = value; - stack->head++; -} - -static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) -{ - return stack->ptr + (stack->head - count); -} - -static inline void rvalue_stack_pop(rvalue_stack *stack, long count) -{ - stack->head -= count; -} - -static void rvalue_stack_mark(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); - } -} - -static void rvalue_stack_free(void *ptr) -{ - rvalue_stack *stack = (rvalue_stack *)ptr; - if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); - } -} - -static size_t rvalue_stack_memsize(const void *ptr) -{ - const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; -} - -static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { - .dmark = rvalue_stack_mark, - .dfree = rvalue_stack_free, - .dsize = rvalue_stack_memsize, - }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) -{ - rvalue_stack *stack; - *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - *stack_ref = stack; - MEMCPY(stack, old_stack, rvalue_stack, 1); - - stack->capa = old_stack->capa << 1; - stack->ptr = ALLOC_N(VALUE, stack->capa); - stack->type = RVALUE_STACK_HEAP_ALLOCATED; - MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); - return stack; -} - -static void rvalue_stack_eagerly_release(VALUE handle) -{ - rvalue_stack *stack; - TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; - rvalue_stack_free(stack); -} - -/* unicode */ - -static const signed char digit_values[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1 -}; - -static uint32_t unescape_unicode(const unsigned char *p) -{ - const uint32_t replacement_char = 0xFFFD; - - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) return replacement_char; - result = (result << 4) | (unsigned char)b; - return result; -} - -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) -{ - int len = 1; - if (ch <= 0x7F) { - buf[0] = (char) ch; - } else if (ch <= 0x07FF) { - buf[0] = (char) ((ch >> 6) | 0xC0); - buf[1] = (char) ((ch & 0x3F) | 0x80); - len++; - } else if (ch <= 0xFFFF) { - buf[0] = (char) ((ch >> 12) | 0xE0); - buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80); - buf[2] = (char) ((ch & 0x3F) | 0x80); - len += 2; - } else if (ch <= 0x1fffff) { - buf[0] =(char) ((ch >> 18) | 0xF0); - buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80); - buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80); - buf[3] =(char) ((ch & 0x3F) | 0x80); - len += 3; - } else { - buf[0] = '?'; - } - return len; -} - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int in_array; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; - bool create_additions; - bool deprecated_create_additions; - rvalue_cache name_cache; - rvalue_stack *stack; - VALUE stack_handle; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") - -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static const rb_data_type_t JSON_Parser_type; -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); - - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN); - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0'; - ptr = buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - - -%%{ - machine JSON_common; - - cr = '\n'; - cr_neg = [^\n]; - ws = [ \t\r\n]; - c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/'; - cpp_comment = '//' cr_neg* cr; - comment = c_comment | cpp_comment; - ignore = ws | comment; - name_separator = ':'; - value_separator = ','; - Vnull = 'null'; - Vfalse = 'false'; - Vtrue = 'true'; - VNaN = 'NaN'; - VInfinity = 'Infinity'; - VMinusInfinity = '-Infinity'; - begin_value = [nft\"\-\[\{NI] | digit; - begin_object = '{'; - end_object = '}'; - begin_array = '['; - end_array = ']'; - begin_string = '"'; - begin_name = begin_string; - begin_number = digit | '-'; -}%% - -%%{ - machine JSON_object; - include JSON_common; - - write data; - - action parse_value { - char *np = JSON_parse_value(json, fpc, pe, result, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - fexec np; - } - } - - action allow_trailing_comma { json->allow_trailing_comma } - - action parse_name { - char *np; - json->parsing_name = true; - np = JSON_parse_string(json, fpc, pe, result); - json->parsing_name = false; - if (np == NULL) { fhold; fbreak; } else { - PUSH(*result); - fexec np; - } - } - - action exit { fhold; fbreak; } - - pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value; - next_pair = ignore* value_separator pair; - - main := ( - begin_object - (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* - end_object - ) @exit; -}%% - -#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) - -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - - long stack_head = json->stack->head; - - %% write init; - %% write exec; - - if (cs >= JSON_object_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->object_class)) { - VALUE object = rb_class_new_instance(0, 0, json->object_class); - long index = 0; - VALUE *items = rvalue_stack_peek(json->stack, count); - while (index < count) { - VALUE name = items[index++]; - VALUE value = items[index++]; - rb_funcall(object, i_aset, 2, name, value); - } - *result = object; - } else { - VALUE hash; -#ifdef HAVE_RB_HASH_NEW_CAPA - hash = rb_hash_new_capa(count >> 1); -#else - hash = rb_hash_new(); -#endif - rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); - *result = hash; - } - rvalue_stack_pop(json->stack, count); - - if (RB_UNLIKELY(json->create_additions)) { - VALUE klassname; - if (json->object_class) { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); - } else { - klassname = rb_hash_aref(*result, json->create_id); - } - if (!NIL_P(klassname)) { - VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { - if (json->deprecated_create_additions) { - json_deprecated(deprecated_create_additions_warning); - } - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - } - return p + 1; - } else { - return NULL; - } -} - -%%{ - machine JSON_value; - include JSON_common; - - write data; - - action parse_null { - *result = Qnil; - } - action parse_false { - *result = Qfalse; - } - action parse_true { - *result = Qtrue; - } - action parse_nan { - if (json->allow_nan) { - *result = CNaN; - } else { - raise_parse_error("unexpected token at '%s'", p - 2); - } - } - action parse_infinity { - if (json->allow_nan) { - *result = CInfinity; - } else { - raise_parse_error("unexpected token at '%s'", p - 7); - } - } - action parse_string { - char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { - fhold; - fbreak; - } else { - fexec np; - } - } - - action parse_number { - char *np; - if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) { - if (json->allow_nan) { - *result = CMinusInfinity; - fexec p + 10; - fhold; fbreak; - } else { - raise_parse_error("unexpected token at '%s'", p); - } - } - np = JSON_parse_number(json, fpc, pe, result); - if (np != NULL) { - fexec np; - } - fhold; fbreak; - } - - action parse_array { - char *np; - json->in_array++; - np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); - json->in_array--; - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_object { - char *np; - np = JSON_parse_object(json, fpc, pe, result, current_nesting + 1); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action exit { fhold; fbreak; } - -main := ignore* ( - Vnull @parse_null | - Vfalse @parse_false | - Vtrue @parse_true | - VNaN @parse_nan | - VInfinity @parse_infinity | - begin_number @parse_number | - begin_string @parse_string | - begin_array @parse_array | - begin_object @parse_object - ) ignore* %*exit; -}%% - -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - %% write init; - %% write exec; - - if (json->freeze) { - OBJ_FREEZE(*result); - } - - if (cs >= JSON_value_first_final) { - PUSH(*result); - return p; - } else { - return NULL; - } -} - -%%{ - machine JSON_integer; - - write data; - - action exit { fhold; fbreak; } - - main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); -}%% - -#define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_parse_integer(char *p, char *pe) -{ - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } - - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } - - if (negative) { - memo = -memo; - } - return LL2NUM(memo); -} - -static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) -{ - long len = p - json->memo; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - *result = fast_parse_integer(json->memo, p); - } else { - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); - } - return p + 1; -} - -%%{ - machine JSON_float; - include JSON_common; - - write data; - - action exit { fhold; fbreak; } - action isFloat { is_float = true; } - - main := '-'? ( - (('0' | [1-9][0-9]*) - ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | - ([Ee] [+\-]?[0-9]+)) > isFloat)? - ) (^[0-9Ee.\-]? @exit )); -}%% - -static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - bool is_float = false; - - %% write init; - json->memo = p; - %% write exec; - - if (cs >= JSON_float_first_final) { - if (!is_float) { - return JSON_decode_integer(json, p, result); - } - VALUE mod = Qnil; - ID method_id = 0; - if (json->decimal_class) { - if (rb_respond_to(json->decimal_class, i_try_convert)) { - mod = json->decimal_class; - method_id = i_try_convert; - } else if (rb_respond_to(json->decimal_class, i_new)) { - mod = json->decimal_class; - method_id = i_new; - } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) { - VALUE name = rb_class_name(json->decimal_class); - const char *name_cstr = RSTRING_PTR(name); - const char *last_colon = strrchr(name_cstr, ':'); - if (last_colon) { - const char *mod_path_end = last_colon - 1; - VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - mod = rb_path_to_class(mod_path); - - const char *method_name_beg = last_colon + 1; - long before_len = method_name_beg - name_cstr; - long len = RSTRING_LEN(name) - before_len; - VALUE method_name = rb_str_substr(name, before_len, len); - method_id = SYM2ID(rb_str_intern(method_name)); - } else { - mod = rb_mKernel; - method_id = SYM2ID(rb_str_intern(name)); - } - } - } - - long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - - if (method_id) { - VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer)); - *result = rb_funcallv(mod, method_id, 1, &text); - } else { - *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1)); - } - - return p + 1; - } else { - return NULL; - } -} - - -%%{ - machine JSON_array; - include JSON_common; - - write data; - - action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); - if (np == NULL) { - fhold; fbreak; - } else { - fexec np; - } - } - - action allow_trailing_comma { json->allow_trailing_comma } - - action exit { fhold; fbreak; } - - next_element = value_separator ignore* begin_value >parse_value; - - main := begin_array ignore* - ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? - end_array @exit; -}%% - -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) -{ - int cs = EVIL; - - if (json->max_nesting && current_nesting > json->max_nesting) { - rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); - } - long stack_head = json->stack->head; - - %% write init; - %% write exec; - - if(cs >= JSON_array_first_final) { - long count = json->stack->head - stack_head; - - if (RB_UNLIKELY(json->array_class)) { - VALUE array = rb_class_new_instance(0, 0, json->array_class); - VALUE *items = rvalue_stack_peek(json->stack, count); - long index; - for (index = 0; index < count; index++) { - rb_funcall(array, i_leftshift, 1, items[index]); - } - *result = array; - } else { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); - *result = array; - } - rvalue_stack_pop(json->stack, count); - - return p + 1; - } else { - raise_parse_error("unexpected token at '%s'", p); - return NULL; - } -} - -static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize) -{ - if (symbolize) { - intern = true; - } - VALUE result; -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); - } else { - result = rb_utf8_str_new(start, (long)(end - start)); - } -# else - result = rb_utf8_str_new(start, (long)(end - start)); - if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } -# endif - - if (symbolize) { - result = rb_str_intern(result); - } - - return result; -} - -static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - return build_string(string, stringEnd, intern, symbolize); -} - -static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - char *p = string, *pe = string, *unescape, *bufferStart, *buffer; - int unescape_len; - char buf[4]; - - if (is_name && json->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); - } - - if (RB_LIKELY(cached_key)) { - return cached_key; - } - } - - pe = memchr(p, '\\', bufferSize); - if (RB_UNLIKELY(pe == NULL)) { - return build_string(string, stringEnd, intern, symbolize); - } - - VALUE result = rb_str_buf_new(bufferSize); - rb_enc_associate_index(result, utf8_encindex); - buffer = bufferStart = RSTRING_PTR(result); - - while (pe < stringEnd) { - if (*pe == '\\') { - unescape = (char *) "?"; - unescape_len = 1; - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - switch (*++pe) { - case 'n': - unescape = (char *) "\n"; - break; - case 'r': - unescape = (char *) "\r"; - break; - case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; - break; - case 'b': - unescape = (char *) "\b"; - break; - case 'f': - unescape = (char *) "\f"; - break; - case 'u': - if (pe > stringEnd - 4) { - raise_parse_error("incomplete unicode character escape sequence at '%s'", p); - } else { - uint32_t ch = unescape_unicode((unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error("incomplete surrogate pair at '%s'", p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode((unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; - } - } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; - } - break; - default: - p = pe; - continue; - } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; - } else { - pe++; - } - } - - if (pe > p) { - MEMCPY(buffer, p, char, pe - p); - buffer += pe - p; - } - rb_str_set_len(result, buffer - bufferStart); - - if (symbolize) { - result = rb_str_intern(result); - } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - } - - return result; -} - -%%{ - machine JSON_string; - include JSON_common; - - write data; - - action parse_complex_string { - *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } - - action parse_simple_string { - *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - fexec p + 1; - fhold; - fbreak; - } - - double_quote = '"'; - escape = '\\'; - control = 0..0x1f; - simple = any - escape - double_quote - control; - - main := double_quote ( - (simple*)( - (double_quote) @parse_simple_string | - ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string - ) - ); -}%% - -static int -match_i(VALUE regexp, VALUE klass, VALUE memo) -{ - if (regexp == Qundef) return ST_STOP; - if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) && - RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) { - rb_ary_push(memo, klass); - return ST_STOP; - } - return ST_CONTINUE; -} - -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result) -{ - int cs = EVIL; - VALUE match_string; - - %% write init; - json->memo = p; - %% write exec; - - if (json->create_additions && RTEST(match_string = json->match_string)) { - VALUE klass; - VALUE memo = rb_ary_new2(2); - rb_ary_push(memo, *result); - rb_hash_foreach(match_string, match_i, memo); - klass = rb_ary_entry(memo, 1); - if (RTEST(klass)) { - *result = rb_funcall(klass, i_json_create, 1, *result); - } - } - - if (cs >= JSON_string_first_final) { - return p + 1; - } else { - return NULL; - } -} - -/* - * Document-class: JSON::Ext::Parser - * - * This is the JSON parser implemented as a C extension. It can be configured - * to be used by setting - * - * JSON.parser = JSON::Ext::Parser - * - * with the method parser= in JSON. - * - */ - -static VALUE convert_encoding(VALUE source) -{ - int encindex = RB_ENCODING_GET(source); - - if (RB_LIKELY(encindex == utf8_encindex)) { - return source; - } - - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } - - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); -} - -static int configure_parser_i(VALUE key, VALUE val, VALUE data) -{ - JSON_Parser *json = (JSON_Parser *)data; - - if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { json->freeze = RTEST(val); } - else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } - else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } - else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } - else if (key == sym_create_additions) { - if (NIL_P(val)) { - json->create_additions = true; - json->deprecated_create_additions = true; - } else { - json->create_additions = RTEST(val); - json->deprecated_create_additions = false; - } - } - - return ST_CONTINUE; -} - -static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) -{ - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - - json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; - json->max_nesting = 100; - - if (!NIL_P(opts)) { - Check_Type(opts, T_HASH); - if (RHASH_SIZE(opts) > 0) { - // We assume in most cases few keys are set so it's faster to go over - // the provided keys than to check all possible keys. - rb_hash_foreach(opts, configure_parser_i, (VALUE)json); - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - - if (json->create_additions && !json->create_id) { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - } - - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; -} - -/* - * call-seq: new(source, opts => {}) - * - * Creates a new JSON::Ext::Parser instance for the string _source_. - * - * It will be configured by the _opts_ hash. _opts_ can have the following - * keys: - * - * _opts_ can have the following keys: - * * *max_nesting*: The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, it - * defaults to 100. - * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - * defiance of RFC 4627 to be parsed by the Parser. This option defaults to - * false. - * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is - * also the default. It's not possible to use this option in - * conjunction with the *create_additions* option. - * * *create_additions*: If set to false, the Parser doesn't create - * additions even if a matching class and create_id was found. This option - * defaults to false. - * * *object_class*: Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * +new+ without arguments, and return an object that respond to +[]=+. - * * *array_class*: Defaults to Array If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * +new+ without arguments, and return an object that respond to +<<+. - * * *decimal_class*: Specifies which class to use instead of the default - * (Float) when parsing decimal numbers. This class must accept a single - * string argument in its constructor. - */ -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) -{ - GET_PARSER_INIT; - - rb_check_arity(argc, 1, 2); - - parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); - return self; -} - -%%{ - machine JSON; - - write data; - - include JSON_common; - - action parse_value { - char *np = JSON_parse_value(json, fpc, pe, &result, 0); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_value >parse_value - ) ignore*; -}%% - -/* - * call-seq: parse() - * - * Parses the current JSON text _source_ and returns the complete data - * structure as a result. - * It raises JSON::ParserError if fail to parse. - */ -static VALUE cParser_parse(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - - JSON_Parser _parser = {0}; - JSON_Parser *json = &_parser; - parser_init(json, source, opts); - - char stack_buffer[FBUFFER_STACK_SIZE]; - fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); - - VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { - .type = RVALUE_STACK_STACK_ALLOCATED, - .ptr = rvalue_stack_buffer, - .capa = RVALUE_STACK_INITIAL_CAPA, - }; - json->stack = &stack; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (json->stack_handle) { - rvalue_stack_eagerly_release(json->stack_handle); - } - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - raise_parse_error("unexpected token at '%s'", p); - return Qnil; - } -} - -static void JSON_mark(void *ptr) -{ - JSON_Parser *json = ptr; - rb_gc_mark(json->Vsource); - rb_gc_mark(json->create_id); - rb_gc_mark(json->object_class); - rb_gc_mark(json->array_class); - rb_gc_mark(json->decimal_class); - rb_gc_mark(json->match_string); - rb_gc_mark(json->stack_handle); - - long index; - for (index = 0; index < json->name_cache.length; index++) { - rb_gc_mark(json->name_cache.entries[index]); - } -} - -static void JSON_free(void *ptr) -{ - JSON_Parser *json = ptr; - fbuffer_free(&json->fbuffer); - ruby_xfree(json); -} - -static size_t JSON_memsize(const void *ptr) -{ - const JSON_Parser *json = ptr; - return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer); -} - -static const rb_data_type_t JSON_Parser_type = { - "JSON/Parser", - {JSON_mark, JSON_free, JSON_memsize,}, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, -}; - -static VALUE cJSON_parser_s_allocate(VALUE klass) -{ - JSON_Parser *json; - VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json); - fbuffer_stack_init(&json->fbuffer, 0, NULL, 0); - return obj; -} - -/* - * call-seq: source() - * - * Returns a copy of the current _source_ string, that was used to construct - * this Parser. - */ -static VALUE cParser_source(VALUE self) -{ - GET_PARSER; - return rb_str_dup(json->Vsource); -} - -void Init_parser(void) -{ -#ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); -#endif - -#undef rb_intern - rb_require("json/common"); - mJSON = rb_define_module("JSON"); - mExt = rb_define_module_under(mJSON, "Ext"); - cParser = rb_define_class_under(mExt, "Parser", rb_cObject); - eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); - rb_define_alloc_func(cParser, cJSON_parser_s_allocate); - rb_define_method(cParser, "initialize", cParser_initialize, -1); - rb_define_method(cParser, "parse", cParser_parse, 0); - rb_define_method(cParser, "source", cParser_source, 0); - - rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); - - CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); - - CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); - - CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); - - rb_global_variable(&Encoding_UTF_8); - Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); - - sym_max_nesting = ID2SYM(rb_intern("max_nesting")); - sym_allow_nan = ID2SYM(rb_intern("allow_nan")); - sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); - sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); - sym_freeze = ID2SYM(rb_intern("freeze")); - sym_create_additions = ID2SYM(rb_intern("create_additions")); - sym_create_id = ID2SYM(rb_intern("create_id")); - sym_object_class = ID2SYM(rb_intern("object_class")); - sym_array_class = ID2SYM(rb_intern("array_class")); - sym_decimal_class = ID2SYM(rb_intern("decimal_class")); - sym_match_string = ID2SYM(rb_intern("match_string")); - - i_create_id = rb_intern("create_id"); - i_json_creatable_p = rb_intern("json_creatable?"); - i_json_create = rb_intern("json_create"); - i_chr = rb_intern("chr"); - i_match = rb_intern("match"); - i_deep_const_get = rb_intern("deep_const_get"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); - i_new = rb_intern("new"); - i_try_convert = rb_intern("try_convert"); - i_uminus = rb_intern("-@"); - i_encode = rb_intern("encode"); - - binary_encindex = rb_ascii8bit_encindex(); - utf8_encindex = rb_utf8_encindex(); - enc_utf8 = rb_utf8_encoding(); -} - -/* - * Local variables: - * mode: c - * c-file-style: ruby - * indent-tabs-mode: nil - * End: - */ diff --git a/ext/json/parser/prereq.mk b/ext/json/parser/prereq.mk deleted file mode 100644 index fc59169056..0000000000 --- a/ext/json/parser/prereq.mk +++ /dev/null @@ -1,13 +0,0 @@ -RAGEL = ragel - -.SUFFIXES: .rl - -.rl.c: - $(RAGEL) -G2 $< - $(BASERUBY) -pli -e '$$_.sub!(/[ \t]+$$/, "")' \ - -e '$$_.sub!(/^static const int (JSON_.*=.*);$$/, "enum {\\1};")' \ - -e '$$_.sub!(/^(static const char) (_JSON(?:_\w+)?_nfa_\w+)(?=\[\] =)/, "\\1 MAYBE_UNUSED(\\2)")' \ - -e '$$_.sub!(/0 <= ([\( ]+\*[\( ]*p\)+) && \1 <= 31/, "0 <= (signed char)(*(p)) && (*(p)) <= 31")' \ - -e '$$_ = "/* This file is automatically generated from parser.rl by using ragel */\n" + $$_ if $$. == 1' $@ - -parser.c: diff --git a/ext/json/simd/conf.rb b/ext/json/simd/conf.rb new file mode 100644 index 0000000000..76f774bc97 --- /dev/null +++ b/ext/json/simd/conf.rb @@ -0,0 +1,24 @@ +case RbConfig::CONFIG['host_cpu'] +when /^(arm|aarch64)/ + # Try to compile a small program using NEON instructions + header, type, init, extra = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)', nil +when /^(x86_64|x64)/ + header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }' +end +if header + if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration') + #{cpp_include(header)} + int main(int argc, char **argv) { + #{type} test = #{init}; + #{extra} + if (argc > 100000) printf("%p", &test); + return 0; + } + SRC + $defs.push("-DJSON_ENABLE_SIMD") + else + puts "Disable SIMD" + end +end + +have_header('cpuid.h') diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h new file mode 100644 index 0000000000..611b41b066 --- /dev/null +++ b/ext/json/simd/simd.h @@ -0,0 +1,208 @@ +#include "../json.h" + +typedef enum { + SIMD_NONE, + SIMD_NEON, + SIMD_SSE2 +} SIMD_Implementation; + +#ifndef __has_builtin // Optional of course. + #define __has_builtin(x) 0 // Compatibility with non-clang compilers. +#endif + +#ifdef __clang__ +# if __has_builtin(__builtin_ctzll) +# define HAVE_BUILTIN_CTZLL 1 +# else +# define HAVE_BUILTIN_CTZLL 0 +# endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define HAVE_BUILTIN_CTZLL 1 +#else +# define HAVE_BUILTIN_CTZLL 0 +#endif + +static inline uint32_t trailing_zeros64(uint64_t input) +{ + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + +#if HAVE_BUILTIN_CTZLL + return __builtin_ctzll(input); +#else + uint32_t trailing_zeros = 0; + uint64_t temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +static inline int trailing_zeros(int input) +{ + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + +#if HAVE_BUILTIN_CTZLL + return __builtin_ctz(input); +#else + int trailing_zeros = 0; + int temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +#ifdef JSON_ENABLE_SIMD + +#define SIMD_MINIMUM_THRESHOLD 4 + +ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len) +{ + RBIMPL_ASSERT_OR_ASSUME(len < 16); + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4 +#if defined(__has_builtin) && __has_builtin(__builtin_memcpy) + // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes. + // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy + // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct + // position in both copies. + + // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the + // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)), + // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional + // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch + // plus two loads and stores generated when using __builtin_memcpy. + if (len >= 8) { + __builtin_memcpy(dst, src, 8); + __builtin_memcpy(dst + len - 8, src + len - 8, 8); + } else { + __builtin_memcpy(dst, src, 4); + __builtin_memcpy(dst + len - 4, src + len - 4, 4); + } +#else + MEMCPY(dst, src, char, len); +#endif +} + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) +#include <arm_neon.h> + +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 +static inline SIMD_Implementation find_simd_implementation(void) +{ + return SIMD_NEON; +} + +#define HAVE_SIMD 1 +#define HAVE_SIMD_NEON 1 + +// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +ALWAYS_INLINE(static) uint64_t neon_match_mask(uint8x16_t matches) +{ + const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); + const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return mask & 0x8888888888888888ull; +} + +ALWAYS_INLINE(static) uint64_t compute_chunk_mask_neon(const char *ptr) +{ + uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); + + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33)); + + uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\')); + uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash); + return neon_match_mask(needs_escape); +} + +ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask) +{ + while (*ptr + sizeof(uint8x16_t) <= end) { + uint64_t chunk_mask = compute_chunk_mask_neon(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(uint8x16_t); + } + return 0; +} + +#endif /* ARM Neon Support.*/ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + +#ifdef HAVE_X86INTRIN_H +#include <x86intrin.h> + +#define HAVE_SIMD 1 +#define HAVE_SIMD_SSE2 1 + +#ifdef HAVE_CPUID_H +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif + +#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) +#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) +#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) +#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) + +ALWAYS_INLINE(static) TARGET_SSE2 int compute_chunk_mask_sse2(const char *ptr) +{ + __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33)); + __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\')); + __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash); + return _mm_movemask_epi8(needs_escape); +} + +ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, const char *end, int *mask) +{ + while (*ptr + sizeof(__m128i) <= end) { + int chunk_mask = compute_chunk_mask_sse2(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(__m128i); + } + + return 0; +} + +#include <cpuid.h> +#endif /* HAVE_CPUID_H */ + +static inline SIMD_Implementation find_simd_implementation(void) +{ + // TODO Revisit. I think the SSE version now only uses SSE2 instructions. + if (__builtin_cpu_supports("sse2")) { + return SIMD_SSE2; + } + + return SIMD_NONE; +} + +#endif /* HAVE_X86INTRIN_H */ +#endif /* X86_64 Support */ + +#endif /* JSON_ENABLE_SIMD */ + +#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED +static inline SIMD_Implementation find_simd_implementation(void) +{ + return SIMD_NONE; +} +#endif diff --git a/ext/json/vendor/fpconv.c b/ext/json/vendor/fpconv.c new file mode 100644 index 0000000000..6c9bc2c103 --- /dev/null +++ b/ext/json/vendor/fpconv.c @@ -0,0 +1,480 @@ +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +// The contents of this file is extracted from https://github.com/night-shift/fpconv +// It was slightly modified to append ".0" to plain floats, for use with the https://github.com/ruby/json package. + +#include <stdbool.h> +#include <string.h> +#include <stdint.h> + +#if JSON_DEBUG +#include <assert.h> +#endif + +#define npowers 87 +#define steppowers 8 +#define firstpower -348 /* 10 ^ -348 */ + +#define expmax -32 +#define expmin -60 + +typedef struct Fp { + uint64_t frac; + int exp; +} Fp; + +static const Fp powers_ten[] = { + { 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 }, + { 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 }, + { 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 }, + { 12353653155963782858U, -1060 }, { 18408377700990114895U, -1034 }, + { 13715310171984221708U, -1007 }, { 10218702384817765436U, -980 }, + { 15227053142812498563U, -954 }, { 11345038669416679861U, -927 }, + { 16905424996341287883U, -901 }, { 12595523146049147757U, -874 }, + { 9384396036005875287U, -847 }, { 13983839803942852151U, -821 }, + { 10418772551374772303U, -794 }, { 15525180923007089351U, -768 }, + { 11567161174868858868U, -741 }, { 17236413322193710309U, -715 }, + { 12842128665889583758U, -688 }, { 9568131466127621947U, -661 }, + { 14257626930069360058U, -635 }, { 10622759856335341974U, -608 }, + { 15829145694278690180U, -582 }, { 11793632577567316726U, -555 }, + { 17573882009934360870U, -529 }, { 13093562431584567480U, -502 }, + { 9755464219737475723U, -475 }, { 14536774485912137811U, -449 }, + { 10830740992659433045U, -422 }, { 16139061738043178685U, -396 }, + { 12024538023802026127U, -369 }, { 17917957937422433684U, -343 }, + { 13349918974505688015U, -316 }, { 9946464728195732843U, -289 }, + { 14821387422376473014U, -263 }, { 11042794154864902060U, -236 }, + { 16455045573212060422U, -210 }, { 12259964326927110867U, -183 }, + { 18268770466636286478U, -157 }, { 13611294676837538539U, -130 }, + { 10141204801825835212U, -103 }, { 15111572745182864684U, -77 }, + { 11258999068426240000U, -50 }, { 16777216000000000000U, -24 }, + { 12500000000000000000U, 3 }, { 9313225746154785156U, 30 }, + { 13877787807814456755U, 56 }, { 10339757656912845936U, 83 }, + { 15407439555097886824U, 109 }, { 11479437019748901445U, 136 }, + { 17105694144590052135U, 162 }, { 12744735289059618216U, 189 }, + { 9495567745759798747U, 216 }, { 14149498560666738074U, 242 }, + { 10542197943230523224U, 269 }, { 15709099088952724970U, 295 }, + { 11704190886730495818U, 322 }, { 17440603504673385349U, 348 }, + { 12994262207056124023U, 375 }, { 9681479787123295682U, 402 }, + { 14426529090290212157U, 428 }, { 10748601772107342003U, 455 }, + { 16016664761464807395U, 481 }, { 11933345169920330789U, 508 }, + { 17782069995880619868U, 534 }, { 13248674568444952270U, 561 }, + { 9871031767461413346U, 588 }, { 14708983551653345445U, 614 }, + { 10959046745042015199U, 641 }, { 16330252207878254650U, 667 }, + { 12166986024289022870U, 694 }, { 18130221999122236476U, 720 }, + { 13508068024458167312U, 747 }, { 10064294952495520794U, 774 }, + { 14996968138956309548U, 800 }, { 11173611982879273257U, 827 }, + { 16649979327439178909U, 853 }, { 12405201291620119593U, 880 }, + { 9242595204427927429U, 907 }, { 13772540099066387757U, 933 }, + { 10261342003245940623U, 960 }, { 15290591125556738113U, 986 }, + { 11392378155556871081U, 1013 }, { 16975966327722178521U, 1039 }, + { 12648080533535911531U, 1066 } +}; + +static Fp find_cachedpow10(int exp, int* k) +{ + const double one_log_ten = 0.30102999566398114; + + int approx = (int)(-(exp + npowers) * one_log_ten); + int idx = (approx - firstpower) / steppowers; + + while(1) { + int current = exp + powers_ten[idx].exp + 64; + + if(current < expmin) { + idx++; + continue; + } + + if(current > expmax) { + idx--; + continue; + } + + *k = (firstpower + idx * steppowers); + + return powers_ten[idx]; + } +} + +#define fracmask 0x000FFFFFFFFFFFFFU +#define expmask 0x7FF0000000000000U +#define hiddenbit 0x0010000000000000U +#define signmask 0x8000000000000000U +#define expbias (1023 + 52) + +#define absv(n) ((n) < 0 ? -(n) : (n)) +#define minv(a, b) ((a) < (b) ? (a) : (b)) + +static const uint64_t tens[] = { + 10000000000000000000U, 1000000000000000000U, 100000000000000000U, + 10000000000000000U, 1000000000000000U, 100000000000000U, + 10000000000000U, 1000000000000U, 100000000000U, + 10000000000U, 1000000000U, 100000000U, + 10000000U, 1000000U, 100000U, + 10000U, 1000U, 100U, + 10U, 1U +}; + +static inline uint64_t get_dbits(double d) +{ + union { + double dbl; + uint64_t i; + } dbl_bits = { d }; + + return dbl_bits.i; +} + +static Fp build_fp(double d) +{ + uint64_t bits = get_dbits(d); + + Fp fp; + fp.frac = bits & fracmask; + fp.exp = (bits & expmask) >> 52; + + if(fp.exp) { + fp.frac += hiddenbit; + fp.exp -= expbias; + + } else { + fp.exp = -expbias + 1; + } + + return fp; +} + +static void normalize(Fp* fp) +{ + while ((fp->frac & hiddenbit) == 0) { + fp->frac <<= 1; + fp->exp--; + } + + int shift = 64 - 52 - 1; + fp->frac <<= shift; + fp->exp -= shift; +} + +static void get_normalized_boundaries(Fp* fp, Fp* lower, Fp* upper) +{ + upper->frac = (fp->frac << 1) + 1; + upper->exp = fp->exp - 1; + + while ((upper->frac & (hiddenbit << 1)) == 0) { + upper->frac <<= 1; + upper->exp--; + } + + int u_shift = 64 - 52 - 2; + + upper->frac <<= u_shift; + upper->exp = upper->exp - u_shift; + + + int l_shift = fp->frac == hiddenbit ? 2 : 1; + + lower->frac = (fp->frac << l_shift) - 1; + lower->exp = fp->exp - l_shift; + + + lower->frac <<= lower->exp - upper->exp; + lower->exp = upper->exp; +} + +static Fp multiply(Fp* a, Fp* b) +{ + const uint64_t lomask = 0x00000000FFFFFFFF; + + uint64_t ah_bl = (a->frac >> 32) * (b->frac & lomask); + uint64_t al_bh = (a->frac & lomask) * (b->frac >> 32); + uint64_t al_bl = (a->frac & lomask) * (b->frac & lomask); + uint64_t ah_bh = (a->frac >> 32) * (b->frac >> 32); + + uint64_t tmp = (ah_bl & lomask) + (al_bh & lomask) + (al_bl >> 32); + /* round up */ + tmp += 1U << 31; + + Fp fp = { + ah_bh + (ah_bl >> 32) + (al_bh >> 32) + (tmp >> 32), + a->exp + b->exp + 64 + }; + + return fp; +} + +static void round_digit(char* digits, int ndigits, uint64_t delta, uint64_t rem, uint64_t kappa, uint64_t frac) +{ + while (rem < frac && delta - rem >= kappa && + (rem + kappa < frac || frac - rem > rem + kappa - frac)) { + + digits[ndigits - 1]--; + rem += kappa; + } +} + +static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K) +{ + uint64_t wfrac = upper->frac - fp->frac; + uint64_t delta = upper->frac - lower->frac; + + Fp one; + one.frac = 1ULL << -upper->exp; + one.exp = upper->exp; + + uint64_t part1 = upper->frac >> -one.exp; + uint64_t part2 = upper->frac & (one.frac - 1); + + int idx = 0, kappa = 10; + const uint64_t* divp; + /* 1000000000 */ + for(divp = tens + 10; kappa > 0; divp++) { + + uint64_t div = *divp; + unsigned digit = (unsigned) (part1 / div); + + if (digit || idx) { + digits[idx++] = digit + '0'; + } + + part1 -= digit * div; + kappa--; + + uint64_t tmp = (part1 <<-one.exp) + part2; + if (tmp <= delta) { + *K += kappa; + round_digit(digits, idx, delta, tmp, div << -one.exp, wfrac); + + return idx; + } + } + + /* 10 */ + const uint64_t* unit = tens + 18; + + while(true) { + part2 *= 10; + delta *= 10; + kappa--; + + unsigned digit = (unsigned) (part2 >> -one.exp); + if (digit || idx) { + digits[idx++] = digit + '0'; + } + + part2 &= one.frac - 1; + if (part2 < delta) { + *K += kappa; + round_digit(digits, idx, delta, part2, one.frac, wfrac * *unit); + + return idx; + } + + unit--; + } +} + +static int grisu2(double d, char* digits, int* K) +{ + Fp w = build_fp(d); + + Fp lower, upper; + get_normalized_boundaries(&w, &lower, &upper); + + normalize(&w); + + int k; + Fp cp = find_cachedpow10(upper.exp, &k); + + w = multiply(&w, &cp); + upper = multiply(&upper, &cp); + lower = multiply(&lower, &cp); + + lower.frac++; + upper.frac--; + + *K = -k; + + return generate_digits(&w, &upper, &lower, digits, K); +} + +static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg) +{ + int exp = absv(K + ndigits - 1); + + if(K >= 0 && exp < 15) { + memcpy(dest, digits, ndigits); + memset(dest + ndigits, '0', K); + + /* add a .0 to mark this as a float. */ + dest[ndigits + K] = '.'; + dest[ndigits + K + 1] = '0'; + + return ndigits + K + 2; + } + + /* write decimal w/o scientific notation */ + if(K < 0 && (K > -7 || exp < 10)) { + int offset = ndigits - absv(K); + /* fp < 1.0 -> write leading zero */ + if(offset <= 0) { + offset = -offset; + dest[0] = '0'; + dest[1] = '.'; + memset(dest + 2, '0', offset); + memcpy(dest + offset + 2, digits, ndigits); + + return ndigits + 2 + offset; + + /* fp > 1.0 */ + } else { + memcpy(dest, digits, offset); + dest[offset] = '.'; + memcpy(dest + offset + 1, digits + offset, ndigits - offset); + + return ndigits + 1; + } + } + + /* write decimal w/ scientific notation */ + ndigits = minv(ndigits, 18 - neg); + + int idx = 0; + dest[idx++] = digits[0]; + + if(ndigits > 1) { + dest[idx++] = '.'; + memcpy(dest + idx, digits + 1, ndigits - 1); + idx += ndigits - 1; + } + + dest[idx++] = 'e'; + + char sign = K + ndigits - 1 < 0 ? '-' : '+'; + dest[idx++] = sign; + + int cent = 0; + + if(exp > 99) { + cent = exp / 100; + dest[idx++] = cent + '0'; + exp -= cent * 100; + } + if(exp > 9) { + int dec = exp / 10; + dest[idx++] = dec + '0'; + exp -= dec * 10; + + } else if(cent) { + dest[idx++] = '0'; + } + + dest[idx++] = exp % 10 + '0'; + + return idx; +} + +static int filter_special(double fp, char* dest) +{ + if(fp == 0.0) { + dest[0] = '0'; + dest[1] = '.'; + dest[2] = '0'; + return 3; + } + + uint64_t bits = get_dbits(fp); + + bool nan = (bits & expmask) == expmask; + + if(!nan) { + return 0; + } + + if(bits & fracmask) { + dest[0] = 'n'; dest[1] = 'a'; dest[2] = 'n'; + + } else { + dest[0] = 'i'; dest[1] = 'n'; dest[2] = 'f'; + } + + return 3; +} + +/* Fast and accurate double to string conversion based on Florian Loitsch's + * Grisu-algorithm[1]. + * + * Input: + * fp -> the double to convert, dest -> destination buffer. + * The generated string will never be longer than 32 characters. + * Make sure to pass a pointer to at least 32 bytes of memory. + * The emitted string will not be null terminated. + * + * + * + * Output: + * The number of written characters. + * + * Exemplary usage: + * + * void print(double d) + * { + * char buf[28 + 1] // plus null terminator + * int str_len = fpconv_dtoa(d, buf); + * + * buf[str_len] = '\0'; + * printf("%s", buf); + * } + * + */ +static int fpconv_dtoa(double d, char dest[32]) +{ + char digits[18]; + + int str_len = 0; + bool neg = false; + + if(get_dbits(d) & signmask) { + dest[0] = '-'; + str_len++; + neg = true; + } + + int spec = filter_special(d, dest + str_len); + + if(spec) { + return str_len + spec; + } + + int K = 0; + int ndigits = grisu2(d, digits, &K); + + str_len += emit_digits(digits, ndigits, dest + str_len, K, neg); +#if JSON_DEBUG + assert(str_len <= 32); +#endif + + return str_len; +} diff --git a/ext/json/vendor/jeaiii-ltoa.h b/ext/json/vendor/jeaiii-ltoa.h new file mode 100644 index 0000000000..ba4f497fc8 --- /dev/null +++ b/ext/json/vendor/jeaiii-ltoa.h @@ -0,0 +1,267 @@ +/* + +This file is released under the terms of the MIT License. It is based on the +work of James Edward Anhalt III, with the original license listed below. + +MIT License + +Copyright (c) 2024,2025 Enrico Thierbach - https://github.com/radiospiel +Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef JEAIII_TO_TEXT_H_ +#define JEAIII_TO_TEXT_H_ + +#include <stdint.h> + +typedef uint_fast32_t u32_t; +typedef uint_fast64_t u64_t; + +#define u32(x) ((u32_t)(x)) +#define u64(x) ((u64_t)(x)) + +struct digit_pair +{ + char dd[2]; +}; + +static const struct digit_pair *digits_dd = (struct digit_pair *)( + "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" +); + +static const struct digit_pair *digits_fd = (struct digit_pair *)( + "0_" "1_" "2_" "3_" "4_" "5_" "6_" "7_" "8_" "9_" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" +); + +static const u64_t mask24 = (u64(1) << 24) - 1; +static const u64_t mask32 = (u64(1) << 32) - 1; +static const u64_t mask57 = (u64(1) << 57) - 1; + +#define COPY(buffer, digits) memcpy(buffer, &(digits), sizeof(struct digit_pair)) + +static char * +jeaiii_ultoa(char *b, u64_t n) +{ + if (n < u32(1e2)) { + COPY(b, digits_fd[n]); + return n < 10 ? b + 1 : b + 2; + } + + if (n < u32(1e6)) { + if (n < u32(1e4)) { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * n); + COPY(b, digits_fd[f0 >> 24]); + + b -= n < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + + return b + 4; + } + + u64_t f0 = u64(10 * (1ull << 32ull)/ 1e5 + 1) * n; + COPY(b, digits_fd[f0 >> 32]); + + b -= n < u32(1e5); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + return b + 6; + } + + if (n < u64(1ull << 32ull)) { + if (n < u32(1e8)) { + u64_t f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16; + COPY(b, digits_fd[f0 >> 32]); + + b -= n < u32(1e7); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + return b + 8; + } + + u64_t f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * n; + COPY(b, digits_fd[f0 >> 57]); + + b -= n < u32(1e9); + u64_t f2 = (f0 & mask57) * 100; + COPY(b + 2, digits_dd[f2 >> 57]); + + u64_t f4 = (f2 & mask57) * 100; + COPY(b + 4, digits_dd[f4 >> 57]); + + u64_t f6 = (f4 & mask57) * 100; + COPY(b + 6, digits_dd[f6 >> 57]); + + u64_t f8 = (f6 & mask57) * 100; + COPY(b + 8, digits_dd[f8 >> 57]); + + return b + 10; + } + + // if we get here U must be u64 but some compilers don't know that, so reassign n to a u64 to avoid warnings + u32_t z = n % u32(1e8); + u64_t u = n / u32(1e8); + + if (u < u32(1e2)) { + // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number) + COPY(b, digits_dd[u]); + b += 2; + } + else if (u < u32(1e6)) { + if (u < u32(1e4)) { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * u); + COPY(b, digits_fd[f0 >> 24]); + + b -= u < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + b += 4; + } + else { + u64_t f0 = u64(10 * (1ull << 32ull) / 1e5 + 1) * u; + COPY(b, digits_fd[f0 >> 32]); + + b -= u < u32(1e5); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + b += 6; + } + } + else if (u < u32(1e8)) { + u64_t f0 = u64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16; + COPY(b, digits_fd[f0 >> 32]); + + b -= u < u32(1e7); + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + b += 8; + } + else if (u < u64(1ull << 32ull)) { + u64_t f0 = u64(10 * (1ull << 57ull) / 1e9 + 1) * u; + COPY(b, digits_fd[f0 >> 57]); + + b -= u < u32(1e9); + u64_t f2 = (f0 & mask57) * 100; + COPY(b + 2, digits_dd[f2 >> 57]); + + u64_t f4 = (f2 & mask57) * 100; + COPY(b + 4, digits_dd[f4 >> 57]); + + u64_t f6 = (f4 & mask57) * 100; + COPY(b + 6, digits_dd[f6 >> 57]); + + u64_t f8 = (f6 & mask57) * 100; + COPY(b + 8, digits_dd[f8 >> 57]); + b += 10; + } + else { + u32_t y = u % u32(1e8); + u /= u32(1e8); + + // u is 2, 3, or 4 digits (if u < 10 it would have been handled above) + if (u < u32(1e2)) { + COPY(b, digits_dd[u]); + b += 2; + } + else { + u32_t f0 = u32((10 * (1 << 24) / 1e3 + 1) * u); + COPY(b, digits_fd[f0 >> 24]); + + b -= u < u32(1e3); + u32_t f2 = (f0 & mask24) * 100; + COPY(b + 2, digits_dd[f2 >> 24]); + + b += 4; + } + // do 8 digits + u64_t f0 = (u64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1; + COPY(b, digits_dd[f0 >> 32]); + + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + b += 8; + } + + // do 8 digits + u64_t f0 = (u64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1; + COPY(b, digits_dd[f0 >> 32]); + + u64_t f2 = (f0 & mask32) * 100; + COPY(b + 2, digits_dd[f2 >> 32]); + + u64_t f4 = (f2 & mask32) * 100; + COPY(b + 4, digits_dd[f4 >> 32]); + + u64_t f6 = (f4 & mask32) * 100; + COPY(b + 6, digits_dd[f6 >> 32]); + + return b + 8; +} + +#undef u32 +#undef u64 +#undef COPY + +#endif // JEAIII_TO_TEXT_H_ diff --git a/ext/json/vendor/ryu.h b/ext/json/vendor/ryu.h new file mode 100644 index 0000000000..f06ec814b4 --- /dev/null +++ b/ext/json/vendor/ryu.h @@ -0,0 +1,819 @@ +// Copyright 2018 Ulf Adams +// +// The contents of this file may be used under the terms of the Apache License, +// Version 2.0. +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. +// +// --- +// +// Apache License +// Version 2.0, January 2004 +// http://www.apache.org/licenses/ +// +// TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +// +// 1. Definitions. +// +// "License" shall mean the terms and conditions for use, reproduction, +// and distribution as defined by Sections 1 through 9 of this document. +// +// "Licensor" shall mean the copyright owner or entity authorized by +// the copyright owner that is granting the License. +// +// "Legal Entity" shall mean the union of the acting entity and all +// other entities that control, are controlled by, or are under common +// control with that entity. For the purposes of this definition, +// "control" means (i) the power, direct or indirect, to cause the +// direction or management of such entity, whether by contract or +// otherwise, or (ii) ownership of fifty percent (50%) or more of the +// outstanding shares, or (iii) beneficial ownership of such entity. +// +// "You" (or "Your") shall mean an individual or Legal Entity +// exercising permissions granted by this License. +// +// "Source" form shall mean the preferred form for making modifications, +// including but not limited to software source code, documentation +// source, and configuration files. +// +// "Object" form shall mean any form resulting from mechanical +// transformation or translation of a Source form, including but +// not limited to compiled object code, generated documentation, +// and conversions to other media types. +// +// "Work" shall mean the work of authorship, whether in Source or +// Object form, made available under the License, as indicated by a +// copyright notice that is included in or attached to the work +// (an example is provided in the Appendix below). +// +// "Derivative Works" shall mean any work, whether in Source or Object +// form, that is based on (or derived from) the Work and for which the +// editorial revisions, annotations, elaborations, or other modifications +// represent, as a whole, an original work of authorship. For the purposes +// of this License, Derivative Works shall not include works that remain +// separable from, or merely link (or bind by name) to the interfaces of, +// the Work and Derivative Works thereof. +// +// "Contribution" shall mean any work of authorship, including +// the original version of the Work and any modifications or additions +// to that Work or Derivative Works thereof, that is intentionally +// submitted to Licensor for inclusion in the Work by the copyright owner +// or by an individual or Legal Entity authorized to submit on behalf of +// the copyright owner. For the purposes of this definition, "submitted" +// means any form of electronic, verbal, or written communication sent +// to the Licensor or its representatives, including but not limited to +// communication on electronic mailing lists, source code control systems, +// and issue tracking systems that are managed by, or on behalf of, the +// Licensor for the purpose of discussing and improving the Work, but +// excluding communication that is conspicuously marked or otherwise +// designated in writing by the copyright owner as "Not a Contribution." +// +// "Contributor" shall mean Licensor and any individual or Legal Entity +// on behalf of whom a Contribution has been received by Licensor and +// subsequently incorporated within the Work. +// +// 2. Grant of Copyright License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// copyright license to reproduce, prepare Derivative Works of, +// publicly display, publicly perform, sublicense, and distribute the +// Work and such Derivative Works in Source or Object form. +// +// 3. Grant of Patent License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// (except as stated in this section) patent license to make, have made, +// use, offer to sell, sell, import, and otherwise transfer the Work, +// where such license applies only to those patent claims licensable +// by such Contributor that are necessarily infringed by their +// Contribution(s) alone or by combination of their Contribution(s) +// with the Work to which such Contribution(s) was submitted. If You +// institute patent litigation against any entity (including a +// cross-claim or counterclaim in a lawsuit) alleging that the Work +// or a Contribution incorporated within the Work constitutes direct +// or contributory patent infringement, then any patent licenses +// granted to You under this License for that Work shall terminate +// as of the date such litigation is filed. +// +// 4. Redistribution. You may reproduce and distribute copies of the +// Work or Derivative Works thereof in any medium, with or without +// modifications, and in Source or Object form, provided that You +// meet the following conditions: +// +// (a) You must give any other recipients of the Work or +// Derivative Works a copy of this License; and +// +// (b) You must cause any modified files to carry prominent notices +// stating that You changed the files; and +// +// (c) You must retain, in the Source form of any Derivative Works +// that You distribute, all copyright, patent, trademark, and +// attribution notices from the Source form of the Work, +// excluding those notices that do not pertain to any part of +// the Derivative Works; and +// +// (d) If the Work includes a "NOTICE" text file as part of its +// distribution, then any Derivative Works that You distribute must +// include a readable copy of the attribution notices contained +// within such NOTICE file, excluding those notices that do not +// pertain to any part of the Derivative Works, in at least one +// of the following places: within a NOTICE text file distributed +// as part of the Derivative Works; within the Source form or +// documentation, if provided along with the Derivative Works; or, +// within a display generated by the Derivative Works, if and +// wherever such third-party notices normally appear. The contents +// of the NOTICE file are for informational purposes only and +// do not modify the License. You may add Your own attribution +// notices within Derivative Works that You distribute, alongside +// or as an addendum to the NOTICE text from the Work, provided +// that such additional attribution notices cannot be construed +// as modifying the License. +// +// You may add Your own copyright statement to Your modifications and +// may provide additional or different license terms and conditions +// for use, reproduction, or distribution of Your modifications, or +// for any such Derivative Works as a whole, provided Your use, +// reproduction, and distribution of the Work otherwise complies with +// the conditions stated in this License. +// +// 5. Submission of Contributions. Unless You explicitly state otherwise, +// any Contribution intentionally submitted for inclusion in the Work +// by You to the Licensor shall be under the terms and conditions of +// this License, without any additional terms or conditions. +// Notwithstanding the above, nothing herein shall supersede or modify +// the terms of any separate license agreement you may have executed +// with Licensor regarding such Contributions. +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor, +// except as required for reasonable and customary use in describing the +// origin of the Work and reproducing the content of the NOTICE file. +// +// 7. Disclaimer of Warranty. Unless required by applicable law or +// agreed to in writing, Licensor provides the Work (and each +// Contributor provides its Contributions) on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied, including, without limitation, any warranties or conditions +// of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +// PARTICULAR PURPOSE. You are solely responsible for determining the +// appropriateness of using or redistributing the Work and assume any +// risks associated with Your exercise of permissions under this License. +// +// 8. Limitation of Liability. In no event and under no legal theory, +// whether in tort (including negligence), contract, or otherwise, +// unless required by applicable law (such as deliberate and grossly +// negligent acts) or agreed to in writing, shall any Contributor be +// liable to You for damages, including any direct, indirect, special, +// incidental, or consequential damages of any character arising as a +// result of this License or out of the use or inability to use the +// Work (including but not limited to damages for loss of goodwill, +// work stoppage, computer failure or malfunction, or any and all +// other commercial damages or losses), even if such Contributor +// has been advised of the possibility of such damages. +// +// 9. Accepting Warranty or Additional Liability. While redistributing +// the Work or Derivative Works thereof, You may choose to offer, +// and charge a fee for, acceptance of support, warranty, indemnity, +// or other liability obligations and/or rights consistent with this +// License. However, in accepting such obligations, You may act only +// on Your own behalf and on Your sole responsibility, not on behalf +// of any other Contributor, and only if You agree to indemnify, +// defend, and hold each Contributor harmless for any liability +// incurred by, or claims asserted against, such Contributor by reason +// of your accepting any such warranty or additional liability. +// +// END OF TERMS AND CONDITIONS +// +// APPENDIX: How to apply the Apache License to your work. +// +// To apply the Apache License to your work, attach the following +// boilerplate notice, with the fields enclosed by brackets "[]" +// replaced with your own identifying information. (Don't include +// the brackets!) The text should be enclosed in the appropriate +// comment syntax for the file format. We also recommend that a +// file or class name and description of purpose be included on the +// same "printed page" as the copyright notice for easier +// identification within third-party archives. +// +// Copyright [yyyy] [name of copyright owner] +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// --- +// +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// --- +// Minimal Ryu implementation adapted for Ruby JSON gem by Josef Šimánek +// Optimized for pre-extracted mantissa/exponent from JSON parsing +// This is a stripped-down version containing only what's needed for +// converting decimal mantissa+exponent to IEEE 754 double precision. + +#ifndef RYU_H +#define RYU_H + +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +// Detect __builtin_clzll availability (for floor_log2) +// Note: MSVC doesn't have __builtin_clzll, so we provide a fallback +#ifdef __clang__ + #if __has_builtin(__builtin_clzll) + #define RYU_HAVE_BUILTIN_CLZLL 1 + #else + #define RYU_HAVE_BUILTIN_CLZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define RYU_HAVE_BUILTIN_CLZLL 1 +#else + #define RYU_HAVE_BUILTIN_CLZLL 0 +#endif + +// Count leading zeros (for floor_log2) +static inline uint32_t ryu_leading_zeros64(uint64_t input) +{ +#if RYU_HAVE_BUILTIN_CLZLL + return __builtin_clzll(input); +#else + // Fallback: binary search for the highest set bit + // This works on MSVC and other compilers without __builtin_clzll + if (input == 0) return 64; + uint32_t n = 0; + if (input <= 0x00000000FFFFFFFFULL) { n += 32; input <<= 32; } + if (input <= 0x0000FFFFFFFFFFFFULL) { n += 16; input <<= 16; } + if (input <= 0x00FFFFFFFFFFFFFFULL) { n += 8; input <<= 8; } + if (input <= 0x0FFFFFFFFFFFFFFFULL) { n += 4; input <<= 4; } + if (input <= 0x3FFFFFFFFFFFFFFFULL) { n += 2; input <<= 2; } + if (input <= 0x7FFFFFFFFFFFFFFFULL) { n += 1; } + return n; +#endif +} + +// These tables are generated by PrintDoubleLookupTable. +#define DOUBLE_POW5_INV_BITCOUNT 125 +#define DOUBLE_POW5_BITCOUNT 125 + +#define DOUBLE_POW5_INV_TABLE_SIZE 342 +#define DOUBLE_POW5_TABLE_SIZE 326 + +static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = { + { 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u }, + { 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u }, + { 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u }, + { 13026647942995916322u, 1208925819614629174u }, { 9774590264567735146u, 1934281311383406679u }, + { 11509021026396098440u, 1547425049106725343u }, { 16585914450600699399u, 1237940039285380274u }, + { 15469416676735388068u, 1980704062856608439u }, { 16064882156130220778u, 1584563250285286751u }, + { 9162556910162266299u, 1267650600228229401u }, { 7281393426775805432u, 2028240960365167042u }, + { 16893161185646375315u, 1622592768292133633u }, { 2446482504291369283u, 1298074214633706907u }, + { 7603720821608101175u, 2076918743413931051u }, { 2393627842544570617u, 1661534994731144841u }, + { 16672297533003297786u, 1329227995784915872u }, { 11918280793837635165u, 2126764793255865396u }, + { 5845275820328197809u, 1701411834604692317u }, { 15744267100488289217u, 1361129467683753853u }, + { 3054734472329800808u, 2177807148294006166u }, { 17201182836831481939u, 1742245718635204932u }, + { 6382248639981364905u, 1393796574908163946u }, { 2832900194486363201u, 2230074519853062314u }, + { 5955668970331000884u, 1784059615882449851u }, { 1075186361522890384u, 1427247692705959881u }, + { 12788344622662355584u, 2283596308329535809u }, { 13920024512871794791u, 1826877046663628647u }, + { 3757321980813615186u, 1461501637330902918u }, { 10384555214134712795u, 1169201309864722334u }, + { 5547241898389809503u, 1870722095783555735u }, { 4437793518711847602u, 1496577676626844588u }, + { 10928932444453298728u, 1197262141301475670u }, { 17486291911125277965u, 1915619426082361072u }, + { 6610335899416401726u, 1532495540865888858u }, { 12666966349016942027u, 1225996432692711086u }, + { 12888448528943286597u, 1961594292308337738u }, { 17689456452638449924u, 1569275433846670190u }, + { 14151565162110759939u, 1255420347077336152u }, { 7885109000409574610u, 2008672555323737844u }, + { 9997436015069570011u, 1606938044258990275u }, { 7997948812055656009u, 1285550435407192220u }, + { 12796718099289049614u, 2056880696651507552u }, { 2858676849947419045u, 1645504557321206042u }, + { 13354987924183666206u, 1316403645856964833u }, { 17678631863951955605u, 2106245833371143733u }, + { 3074859046935833515u, 1684996666696914987u }, { 13527933681774397782u, 1347997333357531989u }, + { 10576647446613305481u, 2156795733372051183u }, { 15840015586774465031u, 1725436586697640946u }, + { 8982663654677661702u, 1380349269358112757u }, { 18061610662226169046u, 2208558830972980411u }, + { 10759939715039024913u, 1766847064778384329u }, { 12297300586773130254u, 1413477651822707463u }, + { 15986332124095098083u, 2261564242916331941u }, { 9099716884534168143u, 1809251394333065553u }, + { 14658471137111155161u, 1447401115466452442u }, { 4348079280205103483u, 1157920892373161954u }, + { 14335624477811986218u, 1852673427797059126u }, { 7779150767507678651u, 1482138742237647301u }, + { 2533971799264232598u, 1185710993790117841u }, { 15122401323048503126u, 1897137590064188545u }, + { 12097921058438802501u, 1517710072051350836u }, { 5988988032009131678u, 1214168057641080669u }, + { 16961078480698431330u, 1942668892225729070u }, { 13568862784558745064u, 1554135113780583256u }, + { 7165741412905085728u, 1243308091024466605u }, { 11465186260648137165u, 1989292945639146568u }, + { 16550846638002330379u, 1591434356511317254u }, { 16930026125143774626u, 1273147485209053803u }, + { 4951948911778577463u, 2037035976334486086u }, { 272210314680951647u, 1629628781067588869u }, + { 3907117066486671641u, 1303703024854071095u }, { 6251387306378674625u, 2085924839766513752u }, + { 16069156289328670670u, 1668739871813211001u }, { 9165976216721026213u, 1334991897450568801u }, + { 7286864317269821294u, 2135987035920910082u }, { 16897537898041588005u, 1708789628736728065u }, + { 13518030318433270404u, 1367031702989382452u }, { 6871453250525591353u, 2187250724783011924u }, + { 9186511415162383406u, 1749800579826409539u }, { 11038557946871817048u, 1399840463861127631u }, + { 10282995085511086630u, 2239744742177804210u }, { 8226396068408869304u, 1791795793742243368u }, + { 13959814484210916090u, 1433436634993794694u }, { 11267656730511734774u, 2293498615990071511u }, + { 5324776569667477496u, 1834798892792057209u }, { 7949170070475892320u, 1467839114233645767u }, + { 17427382500606444826u, 1174271291386916613u }, { 5747719112518849781u, 1878834066219066582u }, + { 15666221734240810795u, 1503067252975253265u }, { 12532977387392648636u, 1202453802380202612u }, + { 5295368560860596524u, 1923926083808324180u }, { 4236294848688477220u, 1539140867046659344u }, + { 7078384693692692099u, 1231312693637327475u }, { 11325415509908307358u, 1970100309819723960u }, + { 9060332407926645887u, 1576080247855779168u }, { 14626963555825137356u, 1260864198284623334u }, + { 12335095245094488799u, 2017382717255397335u }, { 9868076196075591040u, 1613906173804317868u }, + { 15273158586344293478u, 1291124939043454294u }, { 13369007293925138595u, 2065799902469526871u }, + { 7005857020398200553u, 1652639921975621497u }, { 16672732060544291412u, 1322111937580497197u }, + { 11918976037903224966u, 2115379100128795516u }, { 5845832015580669650u, 1692303280103036413u }, + { 12055363241948356366u, 1353842624082429130u }, { 841837113407818570u, 2166148198531886609u }, + { 4362818505468165179u, 1732918558825509287u }, { 14558301248600263113u, 1386334847060407429u }, + { 12225235553534690011u, 2218135755296651887u }, { 2401490813343931363u, 1774508604237321510u }, + { 1921192650675145090u, 1419606883389857208u }, { 17831303500047873437u, 2271371013423771532u }, + { 6886345170554478103u, 1817096810739017226u }, { 1819727321701672159u, 1453677448591213781u }, + { 16213177116328979020u, 1162941958872971024u }, { 14873036941900635463u, 1860707134196753639u }, + { 15587778368262418694u, 1488565707357402911u }, { 8780873879868024632u, 1190852565885922329u }, + { 2981351763563108441u, 1905364105417475727u }, { 13453127855076217722u, 1524291284333980581u }, + { 7073153469319063855u, 1219433027467184465u }, { 11317045550910502167u, 1951092843947495144u }, + { 12742985255470312057u, 1560874275157996115u }, { 10194388204376249646u, 1248699420126396892u }, + { 1553625868034358140u, 1997919072202235028u }, { 8621598323911307159u, 1598335257761788022u }, + { 17965325103354776697u, 1278668206209430417u }, { 13987124906400001422u, 2045869129935088668u }, + { 121653480894270168u, 1636695303948070935u }, { 97322784715416134u, 1309356243158456748u }, + { 14913111714512307107u, 2094969989053530796u }, { 8241140556867935363u, 1675975991242824637u }, + { 17660958889720079260u, 1340780792994259709u }, { 17189487779326395846u, 2145249268790815535u }, + { 13751590223461116677u, 1716199415032652428u }, { 18379969808252713988u, 1372959532026121942u }, + { 14650556434236701088u, 2196735251241795108u }, { 652398703163629901u, 1757388200993436087u }, + { 11589965406756634890u, 1405910560794748869u }, { 7475898206584884855u, 2249456897271598191u }, + { 2291369750525997561u, 1799565517817278553u }, { 9211793429904618695u, 1439652414253822842u }, + { 18428218302589300235u, 2303443862806116547u }, { 7363877012587619542u, 1842755090244893238u }, + { 13269799239553916280u, 1474204072195914590u }, { 10615839391643133024u, 1179363257756731672u }, + { 2227947767661371545u, 1886981212410770676u }, { 16539753473096738529u, 1509584969928616540u }, + { 13231802778477390823u, 1207667975942893232u }, { 6413489186596184024u, 1932268761508629172u }, + { 16198837793502678189u, 1545815009206903337u }, { 5580372605318321905u, 1236652007365522670u }, + { 8928596168509315048u, 1978643211784836272u }, { 18210923379033183008u, 1582914569427869017u }, + { 7190041073742725760u, 1266331655542295214u }, { 436019273762630246u, 2026130648867672343u }, + { 7727513048493924843u, 1620904519094137874u }, { 9871359253537050198u, 1296723615275310299u }, + { 4726128361433549347u, 2074757784440496479u }, { 7470251503888749801u, 1659806227552397183u }, + { 13354898832594820487u, 1327844982041917746u }, { 13989140502667892133u, 2124551971267068394u }, + { 14880661216876224029u, 1699641577013654715u }, { 11904528973500979224u, 1359713261610923772u }, + { 4289851098633925465u, 2175541218577478036u }, { 18189276137874781665u, 1740432974861982428u }, + { 3483374466074094362u, 1392346379889585943u }, { 1884050330976640656u, 2227754207823337509u }, + { 5196589079523222848u, 1782203366258670007u }, { 15225317707844309248u, 1425762693006936005u }, + { 5913764258841343181u, 2281220308811097609u }, { 8420360221814984868u, 1824976247048878087u }, + { 17804334621677718864u, 1459980997639102469u }, { 17932816512084085415u, 1167984798111281975u }, + { 10245762345624985047u, 1868775676978051161u }, { 4507261061758077715u, 1495020541582440929u }, + { 7295157664148372495u, 1196016433265952743u }, { 7982903447895485668u, 1913626293225524389u }, + { 10075671573058298858u, 1530901034580419511u }, { 4371188443704728763u, 1224720827664335609u }, + { 14372599139411386667u, 1959553324262936974u }, { 15187428126271019657u, 1567642659410349579u }, + { 15839291315758726049u, 1254114127528279663u }, { 3206773216762499739u, 2006582604045247462u }, + { 13633465017635730761u, 1605266083236197969u }, { 14596120828850494932u, 1284212866588958375u }, + { 4907049252451240275u, 2054740586542333401u }, { 236290587219081897u, 1643792469233866721u }, + { 14946427728742906810u, 1315033975387093376u }, { 16535586736504830250u, 2104054360619349402u }, + { 5849771759720043554u, 1683243488495479522u }, { 15747863852001765813u, 1346594790796383617u }, + { 10439186904235184007u, 2154551665274213788u }, { 15730047152871967852u, 1723641332219371030u }, + { 12584037722297574282u, 1378913065775496824u }, { 9066413911450387881u, 2206260905240794919u }, + { 10942479943902220628u, 1765008724192635935u }, { 8753983955121776503u, 1412006979354108748u }, + { 10317025513452932081u, 2259211166966573997u }, { 874922781278525018u, 1807368933573259198u }, + { 8078635854506640661u, 1445895146858607358u }, { 13841606313089133175u, 1156716117486885886u }, + { 14767872471458792434u, 1850745787979017418u }, { 746251532941302978u, 1480596630383213935u }, + { 597001226353042382u, 1184477304306571148u }, { 15712597221132509104u, 1895163686890513836u }, + { 8880728962164096960u, 1516130949512411069u }, { 10793931984473187891u, 1212904759609928855u }, + { 17270291175157100626u, 1940647615375886168u }, { 2748186495899949531u, 1552518092300708935u }, + { 2198549196719959625u, 1242014473840567148u }, { 18275073973719576693u, 1987223158144907436u }, + { 10930710364233751031u, 1589778526515925949u }, { 12433917106128911148u, 1271822821212740759u }, + { 8826220925580526867u, 2034916513940385215u }, { 7060976740464421494u, 1627933211152308172u }, + { 16716827836597268165u, 1302346568921846537u }, { 11989529279587987770u, 2083754510274954460u }, + { 9591623423670390216u, 1667003608219963568u }, { 15051996368420132820u, 1333602886575970854u }, + { 13015147745246481542u, 2133764618521553367u }, { 3033420566713364587u, 1707011694817242694u }, + { 6116085268112601993u, 1365609355853794155u }, { 9785736428980163188u, 2184974969366070648u }, + { 15207286772667951197u, 1747979975492856518u }, { 1097782973908629988u, 1398383980394285215u }, + { 1756452758253807981u, 2237414368630856344u }, { 5094511021344956708u, 1789931494904685075u }, + { 4075608817075965366u, 1431945195923748060u }, { 6520974107321544586u, 2291112313477996896u }, + { 1527430471115325346u, 1832889850782397517u }, { 12289990821117991246u, 1466311880625918013u }, + { 17210690286378213644u, 1173049504500734410u }, { 9090360384495590213u, 1876879207201175057u }, + { 18340334751822203140u, 1501503365760940045u }, { 14672267801457762512u, 1201202692608752036u }, + { 16096930852848599373u, 1921924308174003258u }, { 1809498238053148529u, 1537539446539202607u }, + { 12515645034668249793u, 1230031557231362085u }, { 1578287981759648052u, 1968050491570179337u }, + { 12330676829633449412u, 1574440393256143469u }, { 13553890278448669853u, 1259552314604914775u }, + { 3239480371808320148u, 2015283703367863641u }, { 17348979556414297411u, 1612226962694290912u }, + { 6500486015647617283u, 1289781570155432730u }, { 10400777625036187652u, 2063650512248692368u }, + { 15699319729512770768u, 1650920409798953894u }, { 16248804598352126938u, 1320736327839163115u }, + { 7551343283653851484u, 2113178124542660985u }, { 6041074626923081187u, 1690542499634128788u }, + { 12211557331022285596u, 1352433999707303030u }, { 1091747655926105338u, 2163894399531684849u }, + { 4562746939482794594u, 1731115519625347879u }, { 7339546366328145998u, 1384892415700278303u }, + { 8053925371383123274u, 2215827865120445285u }, { 6443140297106498619u, 1772662292096356228u }, + { 12533209867169019542u, 1418129833677084982u }, { 5295740528502789974u, 2269007733883335972u }, + { 15304638867027962949u, 1815206187106668777u }, { 4865013464138549713u, 1452164949685335022u }, + { 14960057215536570740u, 1161731959748268017u }, { 9178696285890871890u, 1858771135597228828u }, + { 14721654658196518159u, 1487016908477783062u }, { 4398626097073393881u, 1189613526782226450u }, + { 7037801755317430209u, 1903381642851562320u }, { 5630241404253944167u, 1522705314281249856u }, + { 814844308661245011u, 1218164251424999885u }, { 1303750893857992017u, 1949062802279999816u }, + { 15800395974054034906u, 1559250241823999852u }, { 5261619149759407279u, 1247400193459199882u }, + { 12107939454356961969u, 1995840309534719811u }, { 5997002748743659252u, 1596672247627775849u }, + { 8486951013736837725u, 1277337798102220679u }, { 2511075177753209390u, 2043740476963553087u }, + { 13076906586428298482u, 1634992381570842469u }, { 14150874083884549109u, 1307993905256673975u }, + { 4194654460505726958u, 2092790248410678361u }, { 18113118827372222859u, 1674232198728542688u }, + { 3422448617672047318u, 1339385758982834151u }, { 16543964232501006678u, 2143017214372534641u }, + { 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u }, + { 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u }, + { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u }, + { 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u }, + { 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u }, + { 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u }, + { 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u }, + { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u }, + { 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u }, + { 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u }, + { 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u }, + { 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u }, + { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u }, + { 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u }, + { 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u }, + { 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u }, + { 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u }, + { 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u }, + { 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u }, + { 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u }, + { 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u }, + { 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u }, + { 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u }, + { 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u }, + { 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u }, + { 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u }, + { 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u }, + { 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u }, + { 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u } +}; + +static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = { + { 0u, 1152921504606846976u }, { 0u, 1441151880758558720u }, + { 0u, 1801439850948198400u }, { 0u, 2251799813685248000u }, + { 0u, 1407374883553280000u }, { 0u, 1759218604441600000u }, + { 0u, 2199023255552000000u }, { 0u, 1374389534720000000u }, + { 0u, 1717986918400000000u }, { 0u, 2147483648000000000u }, + { 0u, 1342177280000000000u }, { 0u, 1677721600000000000u }, + { 0u, 2097152000000000000u }, { 0u, 1310720000000000000u }, + { 0u, 1638400000000000000u }, { 0u, 2048000000000000000u }, + { 0u, 1280000000000000000u }, { 0u, 1600000000000000000u }, + { 0u, 2000000000000000000u }, { 0u, 1250000000000000000u }, + { 0u, 1562500000000000000u }, { 0u, 1953125000000000000u }, + { 0u, 1220703125000000000u }, { 0u, 1525878906250000000u }, + { 0u, 1907348632812500000u }, { 0u, 1192092895507812500u }, + { 0u, 1490116119384765625u }, { 4611686018427387904u, 1862645149230957031u }, + { 9799832789158199296u, 1164153218269348144u }, { 12249790986447749120u, 1455191522836685180u }, + { 15312238733059686400u, 1818989403545856475u }, { 14528612397897220096u, 2273736754432320594u }, + { 13692068767113150464u, 1421085471520200371u }, { 12503399940464050176u, 1776356839400250464u }, + { 15629249925580062720u, 2220446049250313080u }, { 9768281203487539200u, 1387778780781445675u }, + { 7598665485932036096u, 1734723475976807094u }, { 274959820560269312u, 2168404344971008868u }, + { 9395221924704944128u, 1355252715606880542u }, { 2520655369026404352u, 1694065894508600678u }, + { 12374191248137781248u, 2117582368135750847u }, { 14651398557727195136u, 1323488980084844279u }, + { 13702562178731606016u, 1654361225106055349u }, { 3293144668132343808u, 2067951531382569187u }, + { 18199116482078572544u, 1292469707114105741u }, { 8913837547316051968u, 1615587133892632177u }, + { 15753982952572452864u, 2019483917365790221u }, { 12152082354571476992u, 1262177448353618888u }, + { 15190102943214346240u, 1577721810442023610u }, { 9764256642163156992u, 1972152263052529513u }, + { 17631875447420442880u, 1232595164407830945u }, { 8204786253993389888u, 1540743955509788682u }, + { 1032610780636961552u, 1925929944387235853u }, { 2951224747111794922u, 1203706215242022408u }, + { 3689030933889743652u, 1504632769052528010u }, { 13834660704216955373u, 1880790961315660012u }, + { 17870034976990372916u, 1175494350822287507u }, { 17725857702810578241u, 1469367938527859384u }, + { 3710578054803671186u, 1836709923159824231u }, { 26536550077201078u, 2295887403949780289u }, + { 11545800389866720434u, 1434929627468612680u }, { 14432250487333400542u, 1793662034335765850u }, + { 8816941072311974870u, 2242077542919707313u }, { 17039803216263454053u, 1401298464324817070u }, + { 12076381983474541759u, 1751623080406021338u }, { 5872105442488401391u, 2189528850507526673u }, + { 15199280947623720629u, 1368455531567204170u }, { 9775729147674874978u, 1710569414459005213u }, + { 16831347453020981627u, 2138211768073756516u }, { 1296220121283337709u, 1336382355046097823u }, + { 15455333206886335848u, 1670477943807622278u }, { 10095794471753144002u, 2088097429759527848u }, + { 6309871544845715001u, 1305060893599704905u }, { 12499025449484531656u, 1631326116999631131u }, + { 11012095793428276666u, 2039157646249538914u }, { 11494245889320060820u, 1274473528905961821u }, + { 532749306367912313u, 1593091911132452277u }, { 5277622651387278295u, 1991364888915565346u }, + { 7910200175544436838u, 1244603055572228341u }, { 14499436237857933952u, 1555753819465285426u }, + { 8900923260467641632u, 1944692274331606783u }, { 12480606065433357876u, 1215432671457254239u }, + { 10989071563364309441u, 1519290839321567799u }, { 9124653435777998898u, 1899113549151959749u }, + { 8008751406574943263u, 1186945968219974843u }, { 5399253239791291175u, 1483682460274968554u }, + { 15972438586593889776u, 1854603075343710692u }, { 759402079766405302u, 1159126922089819183u }, + { 14784310654990170340u, 1448908652612273978u }, { 9257016281882937117u, 1811135815765342473u }, + { 16182956370781059300u, 2263919769706678091u }, { 7808504722524468110u, 1414949856066673807u }, + { 5148944884728197234u, 1768687320083342259u }, { 1824495087482858639u, 2210859150104177824u }, + { 1140309429676786649u, 1381786968815111140u }, { 1425386787095983311u, 1727233711018888925u }, + { 6393419502297367043u, 2159042138773611156u }, { 13219259225790630210u, 1349401336733506972u }, + { 16524074032238287762u, 1686751670916883715u }, { 16043406521870471799u, 2108439588646104644u }, + { 803757039314269066u, 1317774742903815403u }, { 14839754354425000045u, 1647218428629769253u }, + { 4714634887749086344u, 2059023035787211567u }, { 9864175832484260821u, 1286889397367007229u }, + { 16941905809032713930u, 1608611746708759036u }, { 2730638187581340797u, 2010764683385948796u }, + { 10930020904093113806u, 1256727927116217997u }, { 18274212148543780162u, 1570909908895272496u }, + { 4396021111970173586u, 1963637386119090621u }, { 5053356204195052443u, 1227273366324431638u }, + { 15540067292098591362u, 1534091707905539547u }, { 14813398096695851299u, 1917614634881924434u }, + { 13870059828862294966u, 1198509146801202771u }, { 12725888767650480803u, 1498136433501503464u }, + { 15907360959563101004u, 1872670541876879330u }, { 14553786618154326031u, 1170419088673049581u }, + { 4357175217410743827u, 1463023860841311977u }, { 10058155040190817688u, 1828779826051639971u }, + { 7961007781811134206u, 2285974782564549964u }, { 14199001900486734687u, 1428734239102843727u }, + { 13137066357181030455u, 1785917798878554659u }, { 11809646928048900164u, 2232397248598193324u }, + { 16604401366885338411u, 1395248280373870827u }, { 16143815690179285109u, 1744060350467338534u }, + { 10956397575869330579u, 2180075438084173168u }, { 6847748484918331612u, 1362547148802608230u }, + { 17783057643002690323u, 1703183936003260287u }, { 17617136035325974999u, 2128979920004075359u }, + { 17928239049719816230u, 1330612450002547099u }, { 17798612793722382384u, 1663265562503183874u }, + { 13024893955298202172u, 2079081953128979843u }, { 5834715712847682405u, 1299426220705612402u }, + { 16516766677914378815u, 1624282775882015502u }, { 11422586310538197711u, 2030353469852519378u }, + { 11750802462513761473u, 1268970918657824611u }, { 10076817059714813937u, 1586213648322280764u }, + { 12596021324643517422u, 1982767060402850955u }, { 5566670318688504437u, 1239229412751781847u }, + { 2346651879933242642u, 1549036765939727309u }, { 7545000868343941206u, 1936295957424659136u }, + { 4715625542714963254u, 1210184973390411960u }, { 5894531928393704067u, 1512731216738014950u }, + { 16591536947346905892u, 1890914020922518687u }, { 17287239619732898039u, 1181821263076574179u }, + { 16997363506238734644u, 1477276578845717724u }, { 2799960309088866689u, 1846595723557147156u }, + { 10973347230035317489u, 1154122327223216972u }, { 13716684037544146861u, 1442652909029021215u }, + { 12534169028502795672u, 1803316136286276519u }, { 11056025267201106687u, 2254145170357845649u }, + { 18439230838069161439u, 1408840731473653530u }, { 13825666510731675991u, 1761050914342066913u }, + { 3447025083132431277u, 2201313642927583642u }, { 6766076695385157452u, 1375821026829739776u }, + { 8457595869231446815u, 1719776283537174720u }, { 10571994836539308519u, 2149720354421468400u }, + { 6607496772837067824u, 1343575221513417750u }, { 17482743002901110588u, 1679469026891772187u }, + { 17241742735199000331u, 2099336283614715234u }, { 15387775227926763111u, 1312085177259197021u }, + { 5399660979626290177u, 1640106471573996277u }, { 11361262242960250625u, 2050133089467495346u }, + { 11712474920277544544u, 1281333180917184591u }, { 10028907631919542777u, 1601666476146480739u }, + { 7924448521472040567u, 2002083095183100924u }, { 14176152362774801162u, 1251301934489438077u }, + { 3885132398186337741u, 1564127418111797597u }, { 9468101516160310080u, 1955159272639746996u }, + { 15140935484454969608u, 1221974545399841872u }, { 479425281859160394u, 1527468181749802341u }, + { 5210967620751338397u, 1909335227187252926u }, { 17091912818251750210u, 1193334516992033078u }, + { 12141518985959911954u, 1491668146240041348u }, { 15176898732449889943u, 1864585182800051685u }, + { 11791404716994875166u, 1165365739250032303u }, { 10127569877816206054u, 1456707174062540379u }, + { 8047776328842869663u, 1820883967578175474u }, { 836348374198811271u, 2276104959472719343u }, + { 7440246761515338900u, 1422565599670449589u }, { 13911994470321561530u, 1778206999588061986u }, + { 8166621051047176104u, 2222758749485077483u }, { 2798295147690791113u, 1389224218428173427u }, + { 17332926989895652603u, 1736530273035216783u }, { 17054472718942177850u, 2170662841294020979u }, + { 8353202440125167204u, 1356664275808763112u }, { 10441503050156459005u, 1695830344760953890u }, + { 3828506775840797949u, 2119787930951192363u }, { 86973725686804766u, 1324867456844495227u }, + { 13943775212390669669u, 1656084321055619033u }, { 3594660960206173375u, 2070105401319523792u }, + { 2246663100128858359u, 1293815875824702370u }, { 12031700912015848757u, 1617269844780877962u }, + { 5816254103165035138u, 2021587305976097453u }, { 5941001823691840913u, 1263492066235060908u }, + { 7426252279614801142u, 1579365082793826135u }, { 4671129331091113523u, 1974206353492282669u }, + { 5225298841145639904u, 1233878970932676668u }, { 6531623551432049880u, 1542348713665845835u }, + { 3552843420862674446u, 1927935892082307294u }, { 16055585193321335241u, 1204959932551442058u }, + { 10846109454796893243u, 1506199915689302573u }, { 18169322836923504458u, 1882749894611628216u }, + { 11355826773077190286u, 1176718684132267635u }, { 9583097447919099954u, 1470898355165334544u }, + { 11978871809898874942u, 1838622943956668180u }, { 14973589762373593678u, 2298278679945835225u }, + { 2440964573842414192u, 1436424174966147016u }, { 3051205717303017741u, 1795530218707683770u }, + { 13037379183483547984u, 2244412773384604712u }, { 8148361989677217490u, 1402757983365377945u }, + { 14797138505523909766u, 1753447479206722431u }, { 13884737113477499304u, 2191809349008403039u }, + { 15595489723564518921u, 1369880843130251899u }, { 14882676136028260747u, 1712351053912814874u }, + { 9379973133180550126u, 2140438817391018593u }, { 17391698254306313589u, 1337774260869386620u }, + { 3292878744173340370u, 1672217826086733276u }, { 4116098430216675462u, 2090272282608416595u }, + { 266718509671728212u, 1306420176630260372u }, { 333398137089660265u, 1633025220787825465u }, + { 5028433689789463235u, 2041281525984781831u }, { 10060300083759496378u, 1275800953740488644u }, + { 12575375104699370472u, 1594751192175610805u }, { 1884160825592049379u, 1993438990219513507u }, + { 17318501580490888525u, 1245899368887195941u }, { 7813068920331446945u, 1557374211108994927u }, + { 5154650131986920777u, 1946717763886243659u }, { 915813323278131534u, 1216698602428902287u }, + { 14979824709379828129u, 1520873253036127858u }, { 9501408849870009354u, 1901091566295159823u }, + { 12855909558809837702u, 1188182228934474889u }, { 2234828893230133415u, 1485227786168093612u }, + { 2793536116537666769u, 1856534732710117015u }, { 8663489100477123587u, 1160334207943823134u }, + { 1605989338741628675u, 1450417759929778918u }, { 11230858710281811652u, 1813022199912223647u }, + { 9426887369424876662u, 2266277749890279559u }, { 12809333633531629769u, 1416423593681424724u }, + { 16011667041914537212u, 1770529492101780905u }, { 6179525747111007803u, 2213161865127226132u }, + { 13085575628799155685u, 1383226165704516332u }, { 16356969535998944606u, 1729032707130645415u }, + { 15834525901571292854u, 2161290883913306769u }, { 2979049660840976177u, 1350806802445816731u }, + { 17558870131333383934u, 1688508503057270913u }, { 8113529608884566205u, 2110635628821588642u }, + { 9682642023980241782u, 1319147268013492901u }, { 16714988548402690132u, 1648934085016866126u }, + { 11670363648648586857u, 2061167606271082658u }, { 11905663298832754689u, 1288229753919426661u }, + { 1047021068258779650u, 1610287192399283327u }, { 15143834390605638274u, 2012858990499104158u }, + { 4853210475701136017u, 1258036869061940099u }, { 1454827076199032118u, 1572546086327425124u }, + { 1818533845248790147u, 1965682607909281405u }, { 3442426662494187794u, 1228551629943300878u }, + { 13526405364972510550u, 1535689537429126097u }, { 3072948650933474476u, 1919611921786407622u }, + { 15755650962115585259u, 1199757451116504763u }, { 15082877684217093670u, 1499696813895630954u }, + { 9630225068416591280u, 1874621017369538693u }, { 8324733676974063502u, 1171638135855961683u }, + { 5794231077790191473u, 1464547669819952104u }, { 7242788847237739342u, 1830684587274940130u }, + { 18276858095901949986u, 2288355734093675162u }, { 16034722328366106645u, 1430222333808546976u }, + { 1596658836748081690u, 1787777917260683721u }, { 6607509564362490017u, 2234722396575854651u }, + { 1823850468512862308u, 1396701497859909157u }, { 6891499104068465790u, 1745876872324886446u }, + { 17837745916940358045u, 2182346090406108057u }, { 4231062170446641922u, 1363966306503817536u }, + { 5288827713058302403u, 1704957883129771920u }, { 6611034641322878003u, 2131197353912214900u }, + { 13355268687681574560u, 1331998346195134312u }, { 16694085859601968200u, 1664997932743917890u }, + { 11644235287647684442u, 2081247415929897363u }, { 4971804045566108824u, 1300779634956185852u }, + { 6214755056957636030u, 1625974543695232315u }, { 3156757802769657134u, 2032468179619040394u }, + { 6584659645158423613u, 1270292612261900246u }, { 17454196593302805324u, 1587865765327375307u }, + { 17206059723201118751u, 1984832206659219134u }, { 6142101308573311315u, 1240520129162011959u }, + { 3065940617289251240u, 1550650161452514949u }, { 8444111790038951954u, 1938312701815643686u }, + { 665883850346957067u, 1211445438634777304u }, { 832354812933696334u, 1514306798293471630u }, + { 10263815553021896226u, 1892883497866839537u }, { 17944099766707154901u, 1183052186166774710u }, + { 13206752671529167818u, 1478815232708468388u }, { 16508440839411459773u, 1848519040885585485u }, + { 12623618533845856310u, 1155324400553490928u }, { 15779523167307320387u, 1444155500691863660u }, + { 1277659885424598868u, 1805194375864829576u }, { 1597074856780748586u, 2256492969831036970u }, + { 5609857803915355770u, 1410308106144398106u }, { 16235694291748970521u, 1762885132680497632u }, + { 1847873790976661535u, 2203606415850622041u }, { 12684136165428883219u, 1377254009906638775u }, + { 11243484188358716120u, 1721567512383298469u }, { 219297180166231438u, 2151959390479123087u }, + { 7054589765244976505u, 1344974619049451929u }, { 13429923224983608535u, 1681218273811814911u }, + { 12175718012802122765u, 2101522842264768639u }, { 14527352785642408584u, 1313451776415480399u }, + { 13547504963625622826u, 1641814720519350499u }, { 12322695186104640628u, 2052268400649188124u }, + { 16925056528170176201u, 1282667750405742577u }, { 7321262604930556539u, 1603334688007178222u }, + { 18374950293017971482u, 2004168360008972777u }, { 4566814905495150320u, 1252605225005607986u }, + { 14931890668723713708u, 1565756531257009982u }, { 9441491299049866327u, 1957195664071262478u }, + { 1289246043478778550u, 1223247290044539049u }, { 6223243572775861092u, 1529059112555673811u }, + { 3167368447542438461u, 1911323890694592264u }, { 1979605279714024038u, 1194577431684120165u }, + { 7086192618069917952u, 1493221789605150206u }, { 18081112809442173248u, 1866527237006437757u }, + { 13606538515115052232u, 1166579523129023598u }, { 7784801107039039482u, 1458224403911279498u }, + { 507629346944023544u, 1822780504889099373u }, { 5246222702107417334u, 2278475631111374216u }, + { 3278889188817135834u, 1424047269444608885u }, { 8710297504448807696u, 1780059086805761106u } +}; + +// IEEE 754 double precision constants +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_EXPONENT_BIAS 1023 + +// Helper: floor(log2(value)) using ryu_leading_zeros64 +static inline uint32_t floor_log2(const uint64_t value) { + return 63 - ryu_leading_zeros64(value); +} + +// Helper: log2(5^e) approximation +static inline int32_t log2pow5(const int32_t e) { + return (int32_t) ((((uint32_t) e) * 1217359) >> 19); +} + +// Helper: ceil(log2(5^e)) +static inline int32_t ceil_log2pow5(const int32_t e) { + return log2pow5(e) + 1; +} + +// Helper: max of two int32 +static inline int32_t max32(int32_t a, int32_t b) { + return a < b ? b : a; +} + +// Helper: convert uint64 bits to double +static inline double int64Bits2Double(uint64_t bits) { + double f; + memcpy(&f, &bits, sizeof(double)); + return f; +} + +// Check if value is multiple of 2^p +static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) { + return (value & ((1ull << p) - 1)) == 0; +} + +// Count how many times value is divisible by 5 +// Uses modular inverse to avoid expensive division +static inline uint32_t pow5Factor(uint64_t value) { + const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64) + const uint64_t n_div_5 = 3689348814741910323u; // 2^64 / 5 + uint32_t count = 0; + for (;;) { + value *= m_inv_5; + if (value > n_div_5) + break; + ++count; + } + return count; +} + +// Check if value is multiple of 5^p +// Optimized: uses modular inverse instead of division +static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) { + return pow5Factor(value) >= p; +} + +// 128-bit multiplication with shift +// This is the core operation for converting decimal to binary +#if defined(__SIZEOF_INT128__) +// Use native 128-bit integers if available (GCC/Clang) +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + const unsigned __int128 b0 = ((unsigned __int128) m) * mul[0]; + const unsigned __int128 b2 = ((unsigned __int128) m) * mul[1]; + return (uint64_t) (((b0 >> 64) + b2) >> (j - 64)); +} +#else +// Fallback for systems without 128-bit integers +static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) { + const uint32_t aLo = (uint32_t)a; + const uint32_t aHi = (uint32_t)(a >> 32); + const uint32_t bLo = (uint32_t)b; + const uint32_t bHi = (uint32_t)(b >> 32); + + const uint64_t b00 = (uint64_t)aLo * bLo; + const uint64_t b01 = (uint64_t)aLo * bHi; + const uint64_t b10 = (uint64_t)aHi * bLo; + const uint64_t b11 = (uint64_t)aHi * bHi; + + const uint32_t b00Lo = (uint32_t)b00; + const uint32_t b00Hi = (uint32_t)(b00 >> 32); + + const uint64_t mid1 = b10 + b00Hi; + const uint32_t mid1Lo = (uint32_t)(mid1); + const uint32_t mid1Hi = (uint32_t)(mid1 >> 32); + + const uint64_t mid2 = b01 + mid1Lo; + const uint32_t mid2Lo = (uint32_t)(mid2); + const uint32_t mid2Hi = (uint32_t)(mid2 >> 32); + + const uint64_t pHi = b11 + mid1Hi + mid2Hi; + const uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo; + + *productHi = pHi; + return pLo; +} + +static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) { + return (hi << (64 - dist)) | (lo >> dist); +} + +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + uint64_t high1; + const uint64_t low1 = umul128(m, mul[1], &high1); + uint64_t high0; + umul128(m, mul[0], &high0); + const uint64_t sum = high0 + low1; + if (sum < high0) { + ++high1; + } + return shiftright128(sum, high1, j - 64); +} +#endif + +// Main conversion function: decimal mantissa+exponent to IEEE 754 double +// Optimized for JSON parsing with fast paths for edge cases +static inline double ryu_s2d_from_parts(uint64_t m10, int m10digits, int32_t e10, bool signedM) { + // Fast path: handle zero explicitly (e.g., "0.0", "0e0") + if (m10 == 0) { + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + // Fast path: handle overflow/underflow early + if (m10digits + e10 <= -324) { + // Underflow to zero + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + if (m10digits + e10 >= 310) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Convert decimal to binary: m10 * 10^e10 = m2 * 2^e2 + int32_t e2; + uint64_t m2; + bool trailingZeros; + + if (e10 >= 0) { + // Positive exponent: multiply by 5^e10 and adjust binary exponent + e2 = floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_SPLIT[e10], j); + trailingZeros = e2 < e10 || (e2 - e10 < 64 && multipleOfPowerOf2(m10, e2 - e10)); + } else { + // Negative exponent: divide by 5^(-e10) + e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_INV_SPLIT[-e10], j); + trailingZeros = multipleOfPowerOf5(m10, -e10); + } + + // Compute IEEE 754 exponent + uint32_t ieee_e2 = (uint32_t) max32(0, e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2)); + + if (ieee_e2 > 0x7fe) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Compute shift amount for rounding + int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS; + + // IEEE 754 round-to-even (banker's rounding) + trailingZeros &= (m2 & ((1ull << (shift - 1)) - 1)) == 0; + uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1; + bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0)); + + uint64_t ieee_m2 = (m2 >> shift) + roundUp; + ieee_m2 &= (1ull << DOUBLE_MANTISSA_BITS) - 1; + + if (ieee_m2 == 0 && roundUp) { + ieee_e2++; + } + + // Pack sign, exponent, and mantissa into IEEE 754 format + // Match original Ryu: group sign+exponent, then shift and add mantissa + uint64_t ieee = (((((uint64_t) signedM) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2; + return int64Bits2Double(ieee); +} + +#endif // RYU_H |
