diff options
Diffstat (limited to 'ext/json')
| -rw-r--r-- | ext/json/fbuffer/fbuffer.h | 126 | ||||
| -rw-r--r-- | ext/json/generator/depend | 1 | ||||
| -rw-r--r-- | ext/json/generator/extconf.rb | 5 | ||||
| -rw-r--r-- | ext/json/generator/generator.c | 910 | ||||
| -rw-r--r-- | ext/json/json.h | 134 | ||||
| -rw-r--r-- | ext/json/lib/json.rb | 59 | ||||
| -rw-r--r-- | ext/json/lib/json/common.rb | 132 | ||||
| -rw-r--r-- | ext/json/lib/json/ext/generator/state.rb | 25 | ||||
| -rw-r--r-- | ext/json/lib/json/generic_object.rb | 8 | ||||
| -rw-r--r-- | ext/json/lib/json/version.rb | 2 | ||||
| -rw-r--r-- | ext/json/parser/depend | 2 | ||||
| -rw-r--r-- | ext/json/parser/extconf.rb | 8 | ||||
| -rw-r--r-- | ext/json/parser/parser.c | 1832 | ||||
| -rw-r--r-- | ext/json/simd/simd.h | 64 | ||||
| -rw-r--r-- | ext/json/vendor/fpconv.c | 25 | ||||
| -rw-r--r-- | ext/json/vendor/ryu.h | 819 |
16 files changed, 2846 insertions, 1306 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index d32371476c..b4f5266ca5 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -1,47 +1,9 @@ #ifndef _FBUFFER_H_ #define _FBUFFER_H_ -#include "ruby.h" -#include "ruby/encoding.h" +#include "../json.h" #include "../vendor/jeaiii-ltoa.h" -/* shims */ -/* This is the fallback definition from Ruby 3.4 */ - -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include <cstdbool> -# endif -#elif defined(HAVE_STDBOOL_H) -# include <stdbool.h> -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -#ifndef RB_LIKELY -#define RB_LIKELY(expr) expr -#endif - -#ifndef MAYBE_UNUSED -# define MAYBE_UNUSED(x) x -#endif - -#ifdef RUBY_DEBUG -#ifndef JSON_DEBUG -#define JSON_DEBUG RUBY_DEBUG -#endif -#endif - enum fbuffer_type { FBUFFER_HEAP_ALLOCATED = 0, FBUFFER_STACK_ALLOCATED = 1, @@ -49,11 +11,11 @@ enum fbuffer_type { typedef struct FBufferStruct { enum fbuffer_type type; - unsigned long initial_length; - unsigned long len; - unsigned long capa; -#ifdef JSON_DEBUG - unsigned long requested; + size_t initial_length; + size_t len; + size_t capa; +#if JSON_DEBUG + size_t requested; #endif char *ptr; VALUE io; @@ -70,12 +32,12 @@ typedef struct FBufferStruct { static void fbuffer_free(FBuffer *fb); static void fbuffer_clear(FBuffer *fb); -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); +static void fbuffer_append(FBuffer *fb, const char *newstr, size_t len); static void fbuffer_append_long(FBuffer *fb, long number); static inline void fbuffer_append_char(FBuffer *fb, char newchr); static VALUE fbuffer_finalize(FBuffer *fb); -static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) +static void fbuffer_stack_init(FBuffer *fb, size_t initial_length, char *stack_buffer, size_t stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; if (stack_buffer) { @@ -83,14 +45,14 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char * fb->ptr = stack_buffer; fb->capa = stack_buffer_size; } -#ifdef JSON_DEBUG +#if JSON_DEBUG fb->requested = 0; #endif } -static inline void fbuffer_consumed(FBuffer *fb, unsigned long consumed) +static inline void fbuffer_consumed(FBuffer *fb, size_t consumed) { -#ifdef JSON_DEBUG +#if JSON_DEBUG if (consumed > fb->requested) { rb_bug("fbuffer: Out of bound write"); } @@ -102,7 +64,7 @@ static inline void fbuffer_consumed(FBuffer *fb, unsigned long consumed) static void fbuffer_free(FBuffer *fb) { if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { - ruby_xfree(fb->ptr); + JSON_SIZED_FREE_N(fb->ptr, fb->capa); } } @@ -117,7 +79,7 @@ static void fbuffer_flush(FBuffer *fb) fbuffer_clear(fb); } -static void fbuffer_realloc(FBuffer *fb, unsigned long required) +static void fbuffer_realloc(FBuffer *fb, size_t required) { if (required > fb->capa) { if (fb->type == FBUFFER_STACK_ALLOCATED) { @@ -126,13 +88,13 @@ static void fbuffer_realloc(FBuffer *fb, unsigned long required) fb->type = FBUFFER_HEAP_ALLOCATED; MEMCPY(fb->ptr, old_buffer, char, fb->len); } else { - REALLOC_N(fb->ptr, char, required); + JSON_SIZED_REALLOC_N(fb->ptr, char, required, fb->capa); } fb->capa = required; } } -static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_do_inc_capa(FBuffer *fb, size_t requested) { if (RB_UNLIKELY(fb->io)) { if (fb->capa < FBUFFER_IO_BUFFER_SIZE) { @@ -146,7 +108,7 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) } } - unsigned long required; + size_t required; if (RB_UNLIKELY(!fb->ptr)) { fb->ptr = ALLOC_N(char, fb->initial_length); @@ -158,9 +120,9 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) fbuffer_realloc(fb, required); } -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static inline void fbuffer_inc_capa(FBuffer *fb, size_t requested) { -#ifdef JSON_DEBUG +#if JSON_DEBUG fb->requested = requested; #endif @@ -169,19 +131,33 @@ static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) } } -static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len) +static inline size_t fbuffer_size_mul_or_raise(size_t a, size_t b) +{ + size_t result = a * b; + if (RB_UNLIKELY(a != 0 && (result / a) != b)) { + rb_raise(rb_eArgError, "Buffer overflow, the resulting document is too large to be generated"); + } + return result; +} + +static inline void fbuffer_append_reserved(FBuffer *fb, const char *newstr, size_t len) +{ + MEMCPY(fb->ptr + fb->len, newstr, char, len); + fbuffer_consumed(fb, len); +} + +static inline void fbuffer_append(FBuffer *fb, const char *newstr, size_t len) { if (len > 0) { fbuffer_inc_capa(fb, len); - MEMCPY(fb->ptr + fb->len, newstr, char, len); - fbuffer_consumed(fb, len); + fbuffer_append_reserved(fb, newstr, len); } } /* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */ static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr) { -#ifdef JSON_DEBUG +#if JSON_DEBUG if (fb->requested < 1) { rb_bug("fbuffer: unreserved write"); } @@ -194,12 +170,29 @@ static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr) static void fbuffer_append_str(FBuffer *fb, VALUE str) { - const char *newstr = StringValuePtr(str); - unsigned long len = RSTRING_LEN(str); + const char *ptr; + size_t len; + RSTRING_GETMEM(str, ptr, len); + fbuffer_append(fb, ptr, len); RB_GC_GUARD(str); +} - fbuffer_append(fb, newstr, len); +static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat) +{ + const char *ptr; + size_t len; + RSTRING_GETMEM(str, ptr, len); + + fbuffer_inc_capa(fb, fbuffer_size_mul_or_raise(repeat, len)); + while (repeat) { +#if JSON_DEBUG + fb->requested = len; +#endif + fbuffer_append_reserved(fb, ptr, len); + repeat--; + } + RB_GC_GUARD(str); } static inline void fbuffer_append_char(FBuffer *fb, char newchr) @@ -257,14 +250,11 @@ static VALUE fbuffer_finalize(FBuffer *fb) { if (fb->io) { fbuffer_flush(fb); - fbuffer_free(fb); rb_io_flush(fb->io); return fb->io; } else { - VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); - fbuffer_free(fb); - return result; + return rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); } } -#endif +#endif // _FBUFFER_H_ diff --git a/ext/json/generator/depend b/ext/json/generator/depend index aee4ab94eb..3ba4acfdd2 100644 --- a/ext/json/generator/depend +++ b/ext/json/generator/depend @@ -178,6 +178,7 @@ generator.o: $(hdrdir)/ruby/ruby.h generator.o: $(hdrdir)/ruby/st.h generator.o: $(hdrdir)/ruby/subst.h generator.o: $(srcdir)/../fbuffer/fbuffer.h +generator.o: $(srcdir)/../json.h generator.o: $(srcdir)/../simd/simd.h generator.o: $(srcdir)/../vendor/fpconv.c generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb index fb9afd07f7..33af03ea30 100644 --- a/ext/json/generator/extconf.rb +++ b/ext/json/generator/extconf.rb @@ -5,8 +5,11 @@ if RUBY_ENGINE == 'truffleruby' File.write('Makefile', dummy_makefile("").join) else append_cflags("-std=c99") + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 + have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + $defs << "-DJSON_GENERATOR" - $defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"] + $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) load __dir__ + "/../simd/conf.rb" diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 9c6ed93049..82853633ba 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1,4 +1,4 @@ -#include "ruby.h" +#include "../json.h" #include "../fbuffer/fbuffer.h" #include "../vendor/fpconv.c" @@ -9,6 +9,12 @@ /* ruby api and some helpers */ +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + typedef struct JSON_Generator_StateStruct { VALUE indent; VALUE space; @@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct { long depth; long buffer_initial_length; + enum duplicate_key_action on_duplicate_key; + + bool as_json_single_arg; bool allow_nan; bool ascii_only; bool script_safe; bool strict; } JSON_Generator_State; -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(cond) (cond) -#endif - static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; -static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; -static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, +static ID i_to_s, i_to_json, i_new, i_encode; +static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; @@ -55,8 +60,11 @@ struct generate_json_data { JSON_Generator_State *state; VALUE obj; generator_func func; + long depth; }; +static SIMD_Implementation simd_impl; + static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj); @@ -66,9 +74,6 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj); -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj); -#endif static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj); static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj); @@ -76,23 +81,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d static int usascii_encindex, utf8_encindex, binary_encindex; -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_generator_error_str(VALUE invalid_object, VALUE str) +NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str) { + rb_enc_associate_index(str, utf8_encindex); VALUE exc = rb_exc_new_str(eGeneratorError, str); rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object); rb_exc_raise(exc); } -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif #ifdef RBIMPL_ATTR_FORMAT RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3) #endif -static void raise_generator_error(VALUE invalid_object, const char *fmt, ...) +NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...) { va_list args; va_start(args, fmt); @@ -127,13 +127,7 @@ typedef struct _search_state { #endif /* HAVE_SIMD */ } search_state; -#if (defined(__GNUC__ ) || defined(__clang__)) -#define FORCE_INLINE __attribute__((always_inline)) -#else -#define FORCE_INLINE -#endif - -static inline FORCE_INLINE void search_flush(search_state *search) +ALWAYS_INLINE(static) void search_flush(search_state *search) { // Do not remove this conditional without profiling, specifically escape-heavy text. // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush). @@ -160,8 +154,6 @@ static const unsigned char escape_table_basic[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -static unsigned char (*search_escape_basic_impl)(search_state *); - static inline unsigned char search_escape_basic(search_state *search) { while (search->ptr < search->end) { @@ -176,7 +168,7 @@ static inline unsigned char search_escape_basic(search_state *search) return 0; } -static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search) +ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search) { const unsigned char ch = (unsigned char)*search->ptr; switch (ch) { @@ -217,11 +209,39 @@ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search) * Everything else (should be UTF-8) is just passed through and * appended to the result. */ + + +#if defined(HAVE_SIMD_NEON) +static inline unsigned char search_escape_basic_neon(search_state *search); +#elif defined(HAVE_SIMD_SSE2) +static inline unsigned char search_escape_basic_sse2(search_state *search); +#endif + +static inline unsigned char search_escape_basic(search_state *search); + static inline void convert_UTF8_to_JSON(search_state *search) { - while (search_escape_basic_impl(search)) { +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + while (search_escape_basic_neon(search)) { + escape_UTF8_char_basic(search); + } +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + while (search_escape_basic_sse2(search)) { + escape_UTF8_char_basic(search); + } + return; + } + while (search_escape_basic(search)) { + escape_UTF8_char_basic(search); + } +#endif +#else + while (search_escape_basic(search)) { escape_UTF8_char_basic(search); } +#endif /* HAVE_SIMD */ } static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) @@ -263,8 +283,10 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) #ifdef HAVE_SIMD -static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) +ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len) { + RBIMPL_ASSERT_OR_ASSUME(len < vec_len); + // Flush the buffer so everything up until the last 'len' characters are unflushed. search_flush(search); @@ -274,19 +296,25 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi char *s = (buf->ptr + buf->len); // Pad the buffer with dummy characters that won't need escaping. - // This seem wateful at first sight, but memset of vector length is very fast. - memset(s, 'X', vec_len); + // This seem wasteful at first sight, but memset of vector length is very fast. + // This is a space as it can be directly represented as an immediate on AArch64. + memset(s, ' ', vec_len); // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. - MEMCPY(s, search->ptr, char, len); + if (vec_len == 16) { + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); + json_fast_memcpy16(s, search->ptr, len); + } else { + MEMCPY(s, search->ptr, char, len); + } return s; } #ifdef HAVE_SIMD_NEON -static inline FORCE_INLINE unsigned char neon_next_match(search_state *search) +ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search) { uint64_t mask = search->matches_mask; uint32_t index = trailing_zeros64(mask) >> 2; @@ -400,7 +428,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search) #ifdef HAVE_SIMD_SSE2 -static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search) +ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search) { int mask = search->matches_mask; int index = trailing_zeros(mask); @@ -424,7 +452,7 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search) #define TARGET_SSE2 #endif -static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search) +ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search) { if (RB_UNLIKELY(search->has_matches)) { // There are more matches if search->matches_mask > 0. @@ -672,233 +700,6 @@ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned } } -/* - * Document-module: JSON::Ext::Generator - * - * This is the JSON generator implemented as a C extension. It can be - * configured to be used by setting - * - * JSON.generator = JSON::Ext::Generator - * - * with the method generator= in JSON. - * - */ - -/* Explanation of the following: that's the only way to not pollute - * standard library's docs with GeneratorMethods::<ClassName> which - * are uninformative and take a large place in a list of classes - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Array - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Bignum - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::FalseClass - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Fixnum - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Float - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Hash - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Integer - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::NilClass - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::Object - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::String - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::String::Extend - * :nodoc: - */ - -/* - * Document-module: JSON::Ext::Generator::GeneratorMethods::TrueClass - * :nodoc: - */ - -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON object, that is generated from - * this Hash instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_object, Qfalse); -} - -/* - * call-seq: to_json(state = nil) - * - * Returns a JSON string containing a JSON array, that is generated from - * this Array instance. - * _state_ is a JSON::State object, that can also be used to configure the - * produced JSON string output further. - */ -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_array, Qfalse); -} - -#ifdef RUBY_INTEGER_UNIFICATION -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse); -} - -#else -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse); -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Integer number. - */ -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse); -} -#endif - -/* - * call-seq: to_json(*) - * - * Returns a JSON string representation for this Float number. - */ -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_float, Qfalse); -} - -/* - * call-seq: to_json(*) - * - * This string should be encoded with UTF-8 A call to this method - * returns a JSON string encoded with UTF16 big endian characters as - * \u????. - */ -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_string, Qfalse); -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string for true: 'true'. - */ -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("true", 4); -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string for false: 'false'. - */ -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("false", 5); -} - -/* - * call-seq: to_json(*) - * - * Returns a JSON string for nil: 'null'. - */ -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 0, 1); - return rb_utf8_str_new("null", 4); -} - -/* - * call-seq: to_json(*) - * - * Converts this object to a string (calling #to_s), converts - * it to a JSON string, and returns the result. This is a fallback, if no - * special method #to_json was defined for some object. - */ -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) -{ - VALUE state; - VALUE string = rb_funcall(self, i_to_s, 0); - rb_scan_args(argc, argv, "01", &state); - Check_Type(string, T_STRING); - state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string, generate_json_string, Qfalse); -} - static void State_mark(void *ptr) { JSON_Generator_State *state = ptr; @@ -921,32 +722,24 @@ static void State_compact(void *ptr) state->as_json = rb_gc_location(state->as_json); } -static void State_free(void *ptr) -{ - JSON_Generator_State *state = ptr; - ruby_xfree(state); -} - static size_t State_memsize(const void *ptr) { +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + return 0; +#else return sizeof(JSON_Generator_State); -} - -#ifndef HAVE_RB_EXT_RACTOR_SAFE -# undef RUBY_TYPED_FROZEN_SHAREABLE -# define RUBY_TYPED_FROZEN_SHAREABLE 0 #endif +} static const rb_data_type_t JSON_Generator_State_type = { - "JSON/Generator/State", - { + .wrap_struct_name = "JSON/Generator/State", + .function = { .dmark = State_mark, - .dfree = State_free, + .dfree = RUBY_DEFAULT_FREE, .dsize = State_memsize, .dcompact = State_compact, }, - 0, 0, - RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, + .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static void state_init(JSON_Generator_State *state) @@ -978,18 +771,24 @@ static void vstate_spill(struct generate_json_data *data) RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); } -static inline VALUE vstate_get(struct generate_json_data *data) +static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) { if (RB_UNLIKELY(!data->vstate)) { vstate_spill(data); } - return data->vstate; + GET_STATE(data->vstate); + state->depth = data->depth; + VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate); + // no need to restore state->depth, vstate is just a temporary State + return tmp; } -struct hash_foreach_arg { - struct generate_json_data *data; - int iter; -}; +static VALUE +json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key) +{ + VALUE proc_args[2] = {object, is_key}; + return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil); +} static VALUE convert_string_subclass(VALUE key) @@ -1006,6 +805,159 @@ convert_string_subclass(VALUE key) return key_to_s; } +static bool enc_utf8_compatible_p(int enc_idx) +{ + if (enc_idx == usascii_encindex) return true; + if (enc_idx == utf8_encindex) return true; + return false; +} + +static VALUE encode_json_string_try(VALUE str) +{ + return rb_funcall(str, i_encode, 1, Encoding_UTF_8); +} + +static VALUE encode_json_string_rescue(VALUE str, VALUE exception) +{ + raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); + return Qundef; +} + +static inline int json_str_coderange(VALUE str) { + int coderange = RB_ENC_CODERANGE(str); + if (coderange == RUBY_ENC_CODERANGE_UNKNOWN) { + coderange = rb_enc_str_coderange(str); + } + return coderange; +} + +static inline bool valid_json_string_p(VALUE str) +{ + int coderange = json_str_coderange(str); + + if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) { + return true; + } + + if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) { + return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str)); + } + + return false; +} + +NOINLINE(static) VALUE convert_invalid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) { + VALUE coerced_str = json_call_as_json(data->state, str, Qfalse); + if (coerced_str != str) { + if (RB_TYPE_P(coerced_str, T_STRING)) { + if (!valid_json_string_p(coerced_str)) { + raise_generator_error(str, "source sequence is illegal/malformed utf-8"); + } + } else { + // as_json could return another type than T_STRING + if (is_key) { + raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str)); + } + } + + return coerced_str; + } + } + + if (RB_ENCODING_GET_INLINED(str) == binary_encindex) { + VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); + switch (rb_enc_str_coderange(utf8_string)) { + case ENC_CODERANGE_7BIT: + return utf8_string; + case ENC_CODERANGE_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8_string; + break; + } + } + + return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); +} + +ALWAYS_INLINE(static) VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key) +{ + if (RB_LIKELY(valid_json_string_p(str))) { + return str; + } + else { + return convert_invalid_encoding(data, str, as_json_called, is_key); + } +} + +static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + fbuffer_append_char(buffer, '"'); + + long len; + search_state search; + search.buffer = buffer; + RSTRING_GETMEM(obj, search.ptr, len); + search.cursor = search.ptr; + search.end = search.ptr + len; + +#ifdef HAVE_SIMD + search.matches_mask = 0; + search.has_matches = false; + search.chunk_base = NULL; + search.chunk_end = NULL; +#endif /* HAVE_SIMD */ + + switch (json_str_coderange(obj)) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + if (RB_UNLIKELY(data->state->ascii_only)) { + convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); + } else if (RB_UNLIKELY(data->state->script_safe)) { + convert_UTF8_to_script_safe_JSON(&search); + } else { + convert_UTF8_to_JSON(&search); + } + break; + default: + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + break; + } + fbuffer_append_char(buffer, '"'); +} + +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + obj = ensure_valid_encoding(data, obj, false, false); + raw_generate_json_string(buffer, data, obj); +} + +struct hash_foreach_arg { + VALUE hash; + struct generate_json_data *data; + int first_key_type; + bool first; + bool mixed_keys_encountered; +}; + +NOINLINE(static) void +json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg) +{ + if (arg->mixed_keys_encountered) { + return; + } + arg->mixed_keys_encountered = true; + + JSON_Generator_State *state = arg->data->state; + if (state->on_duplicate_key != JSON_IGNORE) { + VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse; + rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise); + } +} + static int json_object_i(VALUE key, VALUE val, VALUE _arg) { @@ -1015,22 +967,34 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) FBuffer *buffer = data->buffer; JSON_Generator_State *state = data->state; - long depth = state->depth; - int j; + long depth = data->depth; + int key_type = rb_type(key); + + if (arg->first) { + arg->first = false; + arg->first_key_type = key_type; + } + else { + fbuffer_append_char(buffer, ','); + } - if (arg->iter > 0) fbuffer_append_char(buffer, ','); if (RB_UNLIKELY(data->state->object_nl)) { fbuffer_append_str(buffer, data->state->object_nl); } if (RB_UNLIKELY(data->state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, data->state->indent); - } + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } VALUE key_to_s; - switch (rb_type(key)) { + bool as_json_called = false; + + start: + switch (key_type) { case T_STRING: + if (RB_UNLIKELY(arg->first_key_type != T_STRING)) { + json_inspect_hash_with_mixed_keys(arg); + } + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { key_to_s = key; } else { @@ -1038,15 +1002,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) } break; case T_SYMBOL: + if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) { + json_inspect_hash_with_mixed_keys(arg); + } + key_to_s = rb_sym2str(key); break; default: + if (data->state->strict) { + if (RTEST(data->state->as_json) && !as_json_called) { + key = json_call_as_json(data->state, key, Qtrue); + key_type = rb_type(key); + as_json_called = true; + goto start; + } else { + raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key)); + } + } key_to_s = rb_convert_type(key, T_STRING, "String", "to_s"); break; } + key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true); + if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) { - generate_json_string(buffer, data, key_to_s); + raw_generate_json_string(buffer, data, key_to_s); } else { generate_json(buffer, data, key_to_s); } @@ -1055,46 +1035,43 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); generate_json(buffer, data, val); - arg->iter++; return ST_CONTINUE; } static inline long increase_depth(struct generate_json_data *data) { JSON_Generator_State *state = data->state; - long depth = ++state->depth; + long depth = ++data->depth; if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) { - rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); + rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth); } return depth; } static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - int j; long depth = increase_depth(data); if (RHASH_SIZE(obj) == 0) { fbuffer_append(buffer, "{}", 2); - --data->state->depth; + --data->depth; return; } fbuffer_append_char(buffer, '{'); struct hash_foreach_arg arg = { + .hash = obj, .data = data, - .iter = 0, + .first = true, }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); - depth = --data->state->depth; + depth = --data->depth; if (RB_UNLIKELY(data->state->object_nl)) { fbuffer_append_str(buffer, data->state->object_nl); if (RB_UNLIKELY(data->state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, data->state->indent); - } + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, '}'); @@ -1102,125 +1079,41 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { - int i, j; long depth = increase_depth(data); if (RARRAY_LEN(obj) == 0) { fbuffer_append(buffer, "[]", 2); - --data->state->depth; + --data->depth; return; } fbuffer_append_char(buffer, '['); if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); - for (i = 0; i < RARRAY_LEN(obj); i++) { + for (int i = 0; i < RARRAY_LEN(obj); i++) { if (i > 0) { fbuffer_append_char(buffer, ','); if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl); } if (RB_UNLIKELY(data->state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, data->state->indent); - } + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } generate_json(buffer, data, RARRAY_AREF(obj, i)); } - data->state->depth = --depth; + data->depth = --depth; if (RB_UNLIKELY(data->state->array_nl)) { fbuffer_append_str(buffer, data->state->array_nl); if (RB_UNLIKELY(data->state->indent)) { - for (j = 0; j < depth; j++) { - fbuffer_append_str(buffer, data->state->indent); - } + fbuffer_append_str_repeat(buffer, data->state->indent, depth); } } fbuffer_append_char(buffer, ']'); } -static inline int enc_utf8_compatible_p(int enc_idx) -{ - if (enc_idx == usascii_encindex) return 1; - if (enc_idx == utf8_encindex) return 1; - return 0; -} - -static VALUE encode_json_string_try(VALUE str) -{ - return rb_funcall(str, i_encode, 1, Encoding_UTF_8); -} - -static VALUE encode_json_string_rescue(VALUE str, VALUE exception) -{ - raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0)); - return Qundef; -} - -static inline VALUE ensure_valid_encoding(VALUE str) -{ - int encindex = RB_ENCODING_GET(str); - VALUE utf8_string; - if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { - if (encindex == binary_encindex) { - utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); - switch (rb_enc_str_coderange(utf8_string)) { - case ENC_CODERANGE_7BIT: - return utf8_string; - case ENC_CODERANGE_VALID: - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Raise in 3.0.0 - rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); - return utf8_string; - break; - } - } - - str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str); - } - return str; -} - -static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj) -{ - obj = ensure_valid_encoding(obj); - - fbuffer_append_char(buffer, '"'); - - long len; - search_state search; - search.buffer = buffer; - RSTRING_GETMEM(obj, search.ptr, len); - search.cursor = search.ptr; - search.end = search.ptr + len; - -#ifdef HAVE_SIMD - search.matches_mask = 0; - search.has_matches = false; - search.chunk_base = NULL; -#endif /* HAVE_SIMD */ - - switch (rb_enc_str_coderange(obj)) { - case ENC_CODERANGE_7BIT: - case ENC_CODERANGE_VALID: - if (RB_UNLIKELY(data->state->ascii_only)) { - convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table); - } else if (RB_UNLIKELY(data->state->script_safe)) { - convert_UTF8_to_script_safe_JSON(&search); - } else { - convert_UTF8_to_JSON(&search); - } - break; - default: - raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); - break; - } - fbuffer_append_char(buffer, '"'); -} - static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp; if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); + tmp = json_call_to_json(data, obj); Check_Type(tmp, T_STRING); fbuffer_append_str(buffer, tmp); } else { @@ -1262,19 +1155,9 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); - fbuffer_append_str(buffer, tmp); + fbuffer_append_str(buffer, StringValue(tmp)); } -#ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj) -{ - if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, data, obj); - else - generate_json_bignum(buffer, data, obj); -} -#endif - static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { double value = RFLOAT_VALUE(obj); @@ -1283,11 +1166,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */ if (!allow_nan) { if (data->state->strict && data->state->as_json) { - VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil); + VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse); if (casted_obj != obj) { increase_depth(data); generate_json(buffer, data, casted_obj); - data->state->depth--; + data->depth--; return; } } @@ -1300,12 +1183,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data } /* This implementation writes directly into the buffer. We reserve - * the 28 characters that fpconv_dtoa states as its maximum. + * the 32 characters that fpconv_dtoa states as its maximum. */ - fbuffer_inc_capa(buffer, 28); + fbuffer_inc_capa(buffer, 32); char* d = buffer->ptr + buffer->len; int len = fpconv_dtoa(value, d); - /* fpconv_dtoa converts a float to its shortest string representation, * but it adds a ".0" if this is a plain integer. */ @@ -1319,7 +1201,7 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d fbuffer_append_str(buffer, fragment); } -static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +static inline void generate_json_general(FBuffer *buffer, struct generate_json_data *data, VALUE obj, bool fallback) { bool as_json_called = false; start: @@ -1346,22 +1228,31 @@ start: generate_json_bignum(buffer, data, obj); break; case T_HASH: - if (klass != rb_cHash) goto general; + if (fallback && klass != rb_cHash) goto general; generate_json_object(buffer, data, obj); break; case T_ARRAY: - if (klass != rb_cArray) goto general; + if (fallback && klass != rb_cArray) goto general; generate_json_array(buffer, data, obj); break; case T_STRING: - if (klass != rb_cString) goto general; - generate_json_string(buffer, data, obj); + if (fallback && klass != rb_cString) goto general; + + if (RB_LIKELY(valid_json_string_p(obj))) { + raw_generate_json_string(buffer, data, obj); + } else if (as_json_called) { + raise_generator_error(obj, "source sequence is illegal/malformed utf-8"); + } else { + obj = ensure_valid_encoding(data, obj, false, false); + as_json_called = true; + goto start; + } break; case T_SYMBOL: generate_json_symbol(buffer, data, obj); break; case T_FLOAT: - if (klass != rb_cFloat) goto general; + if (fallback && klass != rb_cFloat) goto general; generate_json_float(buffer, data, obj); break; case T_STRUCT: @@ -1372,7 +1263,7 @@ start: general: if (data->state->strict) { if (RTEST(data->state->as_json) && !as_json_called) { - obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil); + obj = json_call_as_json(data->state, obj, Qfalse); as_json_called = true; goto start; } else { @@ -1385,26 +1276,34 @@ start: } } +static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, true); +} + +static void generate_json_no_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj) +{ + generate_json_general(buffer, data, obj, false); +} + static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; data->func(data->buffer, data, data->obj); - return Qnil; + return fbuffer_finalize(data->buffer); } -static VALUE generate_json_rescue(VALUE d, VALUE exc) +static VALUE generate_json_ensure(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; fbuffer_free(data->buffer); - rb_exc_raise(exc); - return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) +static inline VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) { GET_STATE(self); @@ -1416,14 +1315,13 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, struct generate_json_data data = { .buffer = &buffer, - .vstate = self, + .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json .state = state, + .depth = state->depth, .obj = obj, .func = func }; - rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - - return fbuffer_finalize(&buffer); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); } /* call-seq: @@ -1439,10 +1337,16 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self) rb_check_arity(argc, 1, 2); VALUE obj = argv[0]; VALUE io = argc > 1 ? argv[1] : Qnil; - VALUE result = cState_partial_generate(self, obj, generate_json, io); - GET_STATE(self); - (void)state; - return result; + return cState_partial_generate(self, obj, generate_json, io); +} + +/* :nodoc: */ +static VALUE cState_generate_no_fallback(int argc, VALUE *argv, VALUE self) +{ + rb_check_arity(argc, 1, 2); + VALUE obj = argv[0]; + VALUE io = argc > 1 ? argv[1] : Qnil; + return cState_partial_generate(self, obj, generate_json_no_fallback, io); } static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) @@ -1467,12 +1371,14 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State"); MEMCPY(objState, origState, JSON_Generator_State, 1); - objState->indent = origState->indent; - objState->space = origState->space; - objState->space_before = origState->space_before; - objState->object_nl = origState->object_nl; - objState->array_nl = origState->array_nl; - objState->as_json = origState->as_json; + + RB_OBJ_WRITTEN(obj, Qundef, objState->indent); + RB_OBJ_WRITTEN(obj, Qundef, objState->space); + RB_OBJ_WRITTEN(obj, Qundef, objState->space_before); + RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl); + RB_OBJ_WRITTEN(obj, Qundef, objState->as_json); + return obj; } @@ -1523,6 +1429,7 @@ static VALUE string_config(VALUE config) */ static VALUE cState_indent_set(VALUE self, VALUE indent) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->indent, string_config(indent)); return Qnil; @@ -1548,6 +1455,7 @@ static VALUE cState_space(VALUE self) */ static VALUE cState_space_set(VALUE self, VALUE space) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->space, string_config(space)); return Qnil; @@ -1571,6 +1479,7 @@ static VALUE cState_space_before(VALUE self) */ static VALUE cState_space_before_set(VALUE self, VALUE space_before) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->space_before, string_config(space_before)); return Qnil; @@ -1596,6 +1505,7 @@ static VALUE cState_object_nl(VALUE self) */ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl)); return Qnil; @@ -1619,6 +1529,7 @@ static VALUE cState_array_nl(VALUE self) */ static VALUE cState_array_nl_set(VALUE self, VALUE array_nl) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl)); return Qnil; @@ -1642,6 +1553,7 @@ static VALUE cState_as_json(VALUE self) */ static VALUE cState_as_json_set(VALUE self, VALUE as_json) { + rb_check_frozen(self); GET_STATE(self); RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc")); return Qnil; @@ -1673,7 +1585,21 @@ static VALUE cState_max_nesting(VALUE self) static long long_config(VALUE num) { - return RTEST(num) ? FIX2LONG(num) : 0; + return RTEST(num) ? NUM2LONG(num) : 0; +} + +// depth must never be negative; reject early with a clear error. +static long depth_config(VALUE num) +{ + if (!RTEST(num)) return 0; + long d = NUM2LONG(num); + if (RB_UNLIKELY(d < 0)) { + rb_raise(rb_eArgError, "depth must be >= 0 (got %ld)", d); + } + if (RB_UNLIKELY(d > INT_MAX)) { + rb_raise(rb_eArgError, "depth is too large (got %ld)", d); + } + return d; } /* @@ -1684,6 +1610,7 @@ static long long_config(VALUE num) */ static VALUE cState_max_nesting_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); state->max_nesting = long_config(depth); return Qnil; @@ -1709,6 +1636,7 @@ static VALUE cState_script_safe(VALUE self) */ static VALUE cState_script_safe_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->script_safe = RTEST(enable); return Qnil; @@ -1740,6 +1668,7 @@ static VALUE cState_strict(VALUE self) */ static VALUE cState_strict_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->strict = RTEST(enable); return Qnil; @@ -1764,6 +1693,7 @@ static VALUE cState_allow_nan_p(VALUE self) */ static VALUE cState_allow_nan_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->allow_nan = RTEST(enable); return Qnil; @@ -1788,11 +1718,25 @@ static VALUE cState_ascii_only_p(VALUE self) */ static VALUE cState_ascii_only_set(VALUE self, VALUE enable) { + rb_check_frozen(self); GET_STATE(self); state->ascii_only = RTEST(enable); return Qnil; } +static VALUE cState_allow_duplicate_key_p(VALUE self) +{ + GET_STATE(self); + switch (state->on_duplicate_key) { + case JSON_IGNORE: + return Qtrue; + case JSON_DEPRECATED: + return Qnil; + default: + return Qfalse; + } +} + /* * call-seq: depth * @@ -1812,8 +1756,9 @@ static VALUE cState_depth(VALUE self) */ static VALUE cState_depth_set(VALUE self, VALUE depth) { + rb_check_frozen(self); GET_STATE(self); - state->depth = long_config(depth); + state->depth = depth_config(depth); return Qnil; } @@ -1845,6 +1790,7 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_ */ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length) { + rb_check_frozen(self); GET_STATE(self); buffer_initial_length_set(state, buffer_initial_length); return Qnil; @@ -1877,13 +1823,15 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg) else if (key == sym_max_nesting) { state->max_nesting = long_config(val); } else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); } else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); } - else if (key == sym_depth) { state->depth = long_config(val); } + else if (key == sym_depth) { state->depth = depth_config(val); } else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); } else if (key == sym_script_safe) { state->script_safe = RTEST(val); } else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } else if (key == sym_strict) { state->strict = RTEST(val); } + else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } else if (key == sym_as_json) { VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; + state->as_json_single_arg = proc && rb_proc_arity(proc) == 1; state_write_value(data, &state->as_json, proc); } return ST_CONTINUE; @@ -1909,12 +1857,13 @@ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE con static VALUE cState_configure(VALUE self, VALUE opts) { + rb_check_frozen(self); GET_STATE(self); configure_state(state, self, opts); return self; } -static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +static VALUE cState_m_do_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io, generator_func func) { JSON_Generator_State state = {0}; state_init(&state); @@ -1930,17 +1879,23 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) .buffer = &buffer, .vstate = Qfalse, .state = &state, + .depth = state.depth, .obj = obj, - .func = generate_json, + .func = func, }; - rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); +} - return fbuffer_finalize(&buffer); +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json); +} + +static VALUE cState_m_generate_no_fallback(VALUE klass, VALUE obj, VALUE opts, VALUE io) +{ + return cState_m_do_generate(klass, obj, opts, io, generate_json_no_fallback); } -/* - * - */ void Init_generator(void) { #ifdef HAVE_RB_EXT_RACTOR_SAFE @@ -2005,45 +1960,12 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, -1); - rb_define_alias(cState, "generate_new", "generate"); // :nodoc: + rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1); - rb_define_singleton_method(cState, "generate", cState_m_generate, 3); - - VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); - - VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); - rb_define_method(mObject, "to_json", mObject_to_json, -1); - - VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash"); - rb_define_method(mHash, "to_json", mHash_to_json, -1); - - VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array"); - rb_define_method(mArray, "to_json", mArray_to_json, -1); - -#ifdef RUBY_INTEGER_UNIFICATION - VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer"); - rb_define_method(mInteger, "to_json", mInteger_to_json, -1); -#else - VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum"); - rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1); + rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); - VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum"); - rb_define_method(mBignum, "to_json", mBignum_to_json, -1); -#endif - VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float"); - rb_define_method(mFloat, "to_json", mFloat_to_json, -1); - - VALUE mString = rb_define_module_under(mGeneratorMethods, "String"); - rb_define_method(mString, "to_json", mString_to_json, -1); - - VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass"); - rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1); - - VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass"); - rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1); - - VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); - rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); + rb_define_singleton_method(cState, "_generate_no_fallback", cState_m_generate_no_fallback, 3); rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); @@ -2051,10 +1973,6 @@ void Init_generator(void) i_to_s = rb_intern("to_s"); i_to_json = rb_intern("to_json"); i_new = rb_intern("new"); - i_pack = rb_intern("pack"); - i_unpack = rb_intern("unpack"); - i_create_id = rb_intern("create_id"); - i_extend = rb_intern("extend"); i_encode = rb_intern("encode"); sym_indent = ID2SYM(rb_intern("indent")); @@ -2071,6 +1989,7 @@ void Init_generator(void) sym_escape_slash = ID2SYM(rb_intern("escape_slash")); sym_strict = ID2SYM(rb_intern("strict")); sym_as_json = ID2SYM(rb_intern("as_json")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); @@ -2078,22 +1997,5 @@ void Init_generator(void) rb_require("json/ext/generator/state"); - - switch (find_simd_implementation()) { -#ifdef HAVE_SIMD -#ifdef HAVE_SIMD_NEON - case SIMD_NEON: - search_escape_basic_impl = search_escape_basic_neon; - break; -#endif /* HAVE_SIMD_NEON */ -#ifdef HAVE_SIMD_SSE2 - case SIMD_SSE2: - search_escape_basic_impl = search_escape_basic_sse2; - break; -#endif /* HAVE_SIMD_SSE2 */ -#endif /* HAVE_SIMD */ - default: - search_escape_basic_impl = search_escape_basic; - break; - } + simd_impl = find_simd_implementation(); } diff --git a/ext/json/json.h b/ext/json/json.h new file mode 100644 index 0000000000..cf9420d4dd --- /dev/null +++ b/ext/json/json.h @@ -0,0 +1,134 @@ +#ifndef _JSON_H_ +#define _JSON_H_ + +#include "ruby.h" +#include "ruby/encoding.h" +#include <stdint.h> + +#ifndef RBIMPL_ASSERT_OR_ASSUME +# define RBIMPL_ASSERT_OR_ASSUME(x) +#endif + +#if defined(RUBY_DEBUG) && RUBY_DEBUG +# define JSON_ASSERT RUBY_ASSERT +#else +# ifdef JSON_DEBUG +# include <assert.h> +# define JSON_ASSERT(x) assert(x) +# else +# define JSON_ASSERT(x) +# endif +#endif + +/* shims */ + +#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG +# define INT64T2NUM(x) LL2NUM(x) +# define UINT64T2NUM(x) ULL2NUM(x) +#elif SIZEOF_UINT64_T == SIZEOF_LONG +# define INT64T2NUM(x) LONG2NUM(x) +# define UINT64T2NUM(x) ULONG2NUM(x) +#else +# error No uint64_t conversion +#endif + +/* This is the fallback definition from Ruby 3.4 */ +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include <cstdbool> +# endif +#elif defined(HAVE_STDBOOL_H) +# include <stdbool.h> +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef HAVE_RUBY_XFREE_SIZED +static inline void ruby_xfree_sized(void *ptr, size_t oldsize) +{ + ruby_xfree(ptr); +} + +static inline void *ruby_xrealloc2_sized(void *ptr, size_t new_elems, size_t elem_size, size_t old_elems) +{ + return ruby_xrealloc2(ptr, new_elems, elem_size); +} +#endif + +# define JSON_SIZED_REALLOC_N(v, T, m, n) \ + ((v) = (T *)ruby_xrealloc2_sized((void *)(v), (m), sizeof(T), (n))) + +# define JSON_SIZED_FREE(v) ruby_xfree_sized((void *)(v), sizeof(*(v))) +# define JSON_SIZED_FREE_N(v, n) ruby_xfree_sized((void *)(v), sizeof(*(v)) * (n)) + +#ifndef HAVE_RB_EXT_RACTOR_SAFE +# undef RUBY_TYPED_FROZEN_SHAREABLE +# define RUBY_TYPED_FROZEN_SHAREABLE 0 +#endif + +#ifdef RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +#else +# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE +# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +# else +# define RUBY_TYPED_EMBEDDABLE 0 +# endif +#endif + +#ifndef NORETURN +#if defined(__has_attribute) && __has_attribute(noreturn) +#define NORETURN(x) __attribute__((noreturn)) x +#else +#define NORETURN(x) x +#endif +#endif + +#ifndef NOINLINE +#if defined(__has_attribute) && __has_attribute(noinline) +#define NOINLINE(x) __attribute__((noinline)) x +#else +#define NOINLINE(x) x +#endif +#endif + +#ifndef ALWAYS_INLINE +#if defined(__has_attribute) && __has_attribute(always_inline) +#define ALWAYS_INLINE(x) inline __attribute__((always_inline)) x +#else +#define ALWAYS_INLINE(x) inline x +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + +#ifdef RUBY_DEBUG +#ifndef JSON_DEBUG +#define JSON_DEBUG RUBY_DEBUG +#endif +#endif + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX +#define JSON_CPU_LITTLE_ENDIAN_64BITS 1 +#else +#define JSON_CPU_LITTLE_ENDIAN_64BITS 0 +#endif + +#endif // _JSON_H_ diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb index 0ebff2f948..26d601926f 100644 --- a/ext/json/lib/json.rb +++ b/ext/json/lib/json.rb @@ -6,6 +6,15 @@ require 'json/common' # # \JSON is a lightweight data-interchange format. # +# \JSON is easy for us humans to read and write, +# and equally simple for machines to read (parse) and write (generate). +# +# \JSON is language-independent, making it an ideal interchange format +# for applications in differing programming languages +# and on differing operating systems. +# +# == \JSON Values +# # A \JSON value is one of the following: # - Double-quoted text: <tt>"foo"</tt>. # - Number: +1+, +1.0+, +2.0e2+. @@ -173,6 +182,30 @@ require 'json/common' # When enabled: # JSON.parse('[1,]', allow_trailing_comma: true) # => [1] # +# --- +# +# Option +allow_control_characters+ (boolean) specifies whether to allow +# unescaped ASCII control characters, such as newlines, in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld" +# +# --- +# +# Option +allow_invalid_escape+ (boolean) specifies whether to ignore backslahes that are followed +# by an invalid escape character in strings; +# defaults to +false+. +# +# With the default, +false+: +# JSON.parse('"Hell\o"') # invalid escape character in string (JSON::ParserError) +# +# When enabled: +# JSON.parse('"Hell\o"', allow_invalid_escape: true) # => "Hello" +# # ====== Output Options # # Option +freeze+ (boolean) specifies whether the returned objects will be frozen; @@ -302,8 +335,27 @@ require 'json/common' # JSON.generate(JSON::MinusInfinity) # # Allow: -# ruby = [Float::NaN, Float::Infinity, Float::MinusInfinity] -# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,-Infinity]' +# ruby = [Float::NAN, Float::INFINITY, JSON::NaN, JSON::Infinity, JSON::MinusInfinity] +# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,NaN,Infinity,-Infinity]' +# +# --- +# +# Option +allow_duplicate_key+ (boolean) specifies whether +# hashes with duplicate keys should be allowed or produce an error. +# defaults to emit a deprecation warning. +# +# With the default, (not set): +# Warning[:deprecated] = true +# JSON.generate({ foo: 1, "foo" => 2 }) +# # warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}. +# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` +# # => '{"foo":1,"foo":2}' +# +# With <tt>false</tt> +# JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false) +# # detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError) +# +# In version 3.0, <tt>false</tt> will become the default. # # --- # @@ -384,6 +436,9 @@ require 'json/common' # # == \JSON Additions # +# Note that JSON Additions must only be used with trusted data, and is +# deprecated. +# # When you "round trip" a non-\String object from Ruby to \JSON and back, # you have a new \String, instead of the object you began with: # ruby0 = Range.new(0, 2) diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index e99d152a88..230bf08012 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -71,9 +71,14 @@ module JSON end when object_class if opts[:create_additions] != false - if class_name = object[JSON.create_id] - klass = JSON.deep_const_get(class_name) - if (klass.respond_to?(:json_creatable?) && klass.json_creatable?) || klass.respond_to?(:json_create) + if class_path = object[JSON.create_id] + klass = begin + Object.const_get(class_path) + rescue NameError => e + raise ArgumentError, "can't get const #{class_path}: #{e}" + end + + if klass.respond_to?(:json_creatable?) ? klass.json_creatable? : klass.respond_to?(:json_create) create_additions_warning if create_additions.nil? object = klass.json_create(object) end @@ -97,7 +102,7 @@ module JSON class << self def deprecation_warning(message, uplevel = 3) # :nodoc: - gem_root = File.expand_path("../../../", __FILE__) + "/" + gem_root = File.expand_path("..", __dir__) + "/" caller_locations(uplevel, 10).each do |frame| if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c") uplevel += 1 @@ -147,29 +152,21 @@ module JSON const_set :Parser, parser end - # Return the constant located at _path_. The format of _path_ has to be - # either ::A::B::C or A::B::C. In any case, A has to be located at the top - # level (absolute namespace path?). If there doesn't exist a constant at - # the given path, an ArgumentError is raised. - def deep_const_get(path) # :nodoc: - Object.const_get(path) - rescue NameError => e - raise ArgumentError, "can't get const #{path}: #{e}" - end - # Set the module _generator_ to be used by JSON. def generator=(generator) # :nodoc: old, $VERBOSE = $VERBOSE, nil @generator = generator - generator_methods = generator::GeneratorMethods - for const in generator_methods.constants - klass = const_get(const) - modul = generator_methods.const_get(const) - klass.class_eval do - instance_methods(false).each do |m| - m.to_s == 'to_json' and remove_method m + if generator.const_defined?(:GeneratorMethods) + generator_methods = generator::GeneratorMethods + for const in generator_methods.constants + klass = const_get(const) + modul = generator_methods.const_get(const) + klass.class_eval do + instance_methods(false).each do |m| + m.to_s == 'to_json' and remove_method m + end + include modul end - include modul end end self.state = generator::State @@ -186,6 +183,25 @@ module JSON private + # Called from the extension when a hash has both string and symbol keys + def on_mixed_keys_hash(hash, do_raise) + set = {} + hash.each_key do |key| + key_str = key.to_s + + if set[key_str] + message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}" + if do_raise + raise GeneratorError, message + else + deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`") + end + else + set[key_str] = true + end + end + end + def deprecated_singleton_attr_accessor(*attrs) args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : "" attrs.each do |attr| @@ -391,7 +407,7 @@ module JSON # # Returns a \String containing the generated \JSON data. # - # See also JSON.fast_generate, JSON.pretty_generate. + # See also JSON.pretty_generate. # # Argument +obj+ is the Ruby object to be converted to \JSON. # @@ -536,6 +552,7 @@ module JSON :create_additions => nil, } # :call-seq: + # JSON.unsafe_load(source, options = {}) -> object # JSON.unsafe_load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -643,6 +660,7 @@ module JSON # when Array # obj.map! {|v| deserialize_obj v } # end + # obj # }) # pp ruby # Output: @@ -666,7 +684,12 @@ module JSON # def unsafe_load(source, proc = nil, options = nil) opts = if options.nil? - _unsafe_load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _unsafe_load_default_options + end else _unsafe_load_default_options.merge(options) end @@ -684,12 +707,17 @@ module JSON if opts[:allow_blank] && (source.nil? || source.empty?) source = 'null' end - result = parse(source, opts) - recurse_proc(result, &proc) if proc - result + + if proc + opts = opts.dup + opts[:on_load] = proc.to_proc + end + + parse(source, opts) end # :call-seq: + # JSON.load(source, options = {}) -> object # JSON.load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -803,6 +831,7 @@ module JSON # when Array # obj.map! {|v| deserialize_obj v } # end + # obj # }) # pp ruby # Output: @@ -825,8 +854,18 @@ module JSON # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>} # def load(source, proc = nil, options = nil) + if proc && options.nil? && proc.is_a?(Hash) + options = proc + proc = nil + end + opts = if options.nil? - _load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _load_default_options + end else _load_default_options.merge(options) end @@ -841,7 +880,7 @@ module JSON end end - if opts[:allow_blank] && (source.nil? || source.empty?) + if opts[:allow_blank] && (source.nil? || (String === source && source.empty?)) source = 'null' end @@ -999,7 +1038,8 @@ module JSON # JSON.new(options = nil, &block) # # Argument +options+, if given, contains a \Hash of options for both parsing and generating. - # See {Parsing Options}[#module-JSON-label-Parsing+Options], and {Generating Options}[#module-JSON-label-Generating+Options]. + # See {Parsing Options}[rdoc-ref:JSON@Parsing+Options], + # and {Generating Options}[rdoc-ref:JSON@Generating+Options]. # # For generation, the <tt>strict: true</tt> option is always set. When a Ruby object with no native \JSON counterpart is # encountered, the block provided to the initialize method is invoked, and must return a Ruby object that has a native @@ -1028,7 +1068,7 @@ module JSON options[:as_json] = as_json if as_json @state = State.new(options).freeze - @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)) + @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)).freeze end # call-seq: @@ -1037,7 +1077,7 @@ module JSON # # Serialize the given object into a \JSON document. def dump(object, io = nil) - @state.generate_new(object, io) + @state.generate(object, io) end alias_method :generate, :dump @@ -1058,6 +1098,30 @@ module JSON load(File.read(path, encoding: Encoding::UTF_8)) end end + + module GeneratorMethods + # call-seq: to_json(*) + # + # Converts this object into a JSON string. + # If this object doesn't directly maps to a JSON native type, + # first convert it to a string (calling #to_s), then converts + # it to a JSON string, and returns the result. + # This is a fallback, if no special method #to_json was defined for some object. + def to_json(state = nil, *) + obj = case self + when nil, false, true, Integer, Float, Array, Hash + self + else + "#{self}" + end + + if state.nil? + JSON::State._generate_no_fallback(obj, nil, nil) + else + JSON::State.from_state(state)._generate_no_fallback(obj) + end + end + end end module ::Kernel @@ -1103,3 +1167,7 @@ module ::Kernel JSON[object, opts] end end + +class Object + include JSON::GeneratorMethods +end diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb index d40c3b5ec3..e4f425af6a 100644 --- a/ext/json/lib/json/ext/generator/state.rb +++ b/ext/json/lib/json/ext/generator/state.rb @@ -8,20 +8,8 @@ module JSON # # Instantiates a new State object, configured by _opts_. # - # _opts_ can have the following keys: - # - # * *indent*: a string used to indent levels (default: ''), - # * *space*: a string that is put after, a : or , delimiter (default: ''), - # * *space_before*: a string that is put before a : pair delimiter (default: ''), - # * *object_nl*: a string that is put at the end of a JSON object (default: ''), - # * *array_nl*: a string that is put at the end of a JSON array (default: ''), - # * *allow_nan*: true if NaN, Infinity, and -Infinity should be - # generated, otherwise an exception is thrown, if these values are - # encountered. This options defaults to false. - # * *ascii_only*: true if only ASCII characters should be generated. This - # option defaults to false. - # * *buffer_initial_length*: sets the initial length of the generator's - # internal buffer. + # Argument +opts+, if given, contains a \Hash of options for the generation. + # See {Generating Options}[rdoc-ref:JSON@Generating+Options]. def initialize(opts = nil) if opts && !opts.empty? configure(opts) @@ -68,6 +56,11 @@ module JSON buffer_initial_length: buffer_initial_length, } + allow_duplicate_key = allow_duplicate_key? + unless allow_duplicate_key.nil? + result[:allow_duplicate_key] = allow_duplicate_key + end + instance_variables.each do |iv| iv = iv.to_s[1..-1] result[iv.to_sym] = self[iv] @@ -82,6 +75,8 @@ module JSON # # Returns the value returned by method +name+. def [](name) + ::JSON.deprecation_warning("JSON::State#[] is deprecated and will be removed in json 3.0.0") + if respond_to?(name) __send__(name) else @@ -94,6 +89,8 @@ module JSON # # Sets the attribute name to value. def []=(name, value) + ::JSON.deprecation_warning("JSON::State#[]= is deprecated and will be removed in json 3.0.0") + if respond_to?(name_writer = "#{name}=") __send__ name_writer, value else diff --git a/ext/json/lib/json/generic_object.rb b/ext/json/lib/json/generic_object.rb index ec5aa9dcb2..5c8ace354b 100644 --- a/ext/json/lib/json/generic_object.rb +++ b/ext/json/lib/json/generic_object.rb @@ -52,14 +52,6 @@ module JSON table end - def [](name) - __send__(name) - end unless method_defined?(:[]) - - def []=(name, value) - __send__("#{name}=", value) - end unless method_defined?(:[]=) - def |(other) self.class[other.to_hash.merge(to_hash)] end diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index f9ac3e17a9..30c0a71d2f 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.13.2' + VERSION = '2.19.8' end diff --git a/ext/json/parser/depend b/ext/json/parser/depend index 1bb03d3517..d4737b1dfb 100644 --- a/ext/json/parser/depend +++ b/ext/json/parser/depend @@ -175,6 +175,8 @@ parser.o: $(hdrdir)/ruby/ruby.h parser.o: $(hdrdir)/ruby/st.h parser.o: $(hdrdir)/ruby/subst.h parser.o: $(srcdir)/../fbuffer/fbuffer.h +parser.o: $(srcdir)/../json.h parser.o: $(srcdir)/../simd/simd.h +parser.o: $(srcdir)/../vendor/ryu.h parser.o: parser.c # AUTOGENERATED DEPENDENCIES END diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index de5d5758b4..a9d740c755 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,10 +1,16 @@ # frozen_string_literal: true require 'mkmf' +$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0" have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 +have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby -have_func("strnlen", "string.h") # Missing on Solaris 10 +have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1 + +if RUBY_ENGINE == "ruby" + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 +end append_cflags("-std=c99") diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 1e6ee753f0..c0631728c3 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,50 +1,22 @@ -#include "ruby.h" -#include "ruby/encoding.h" - -/* shims */ -/* This is the fallback definition from Ruby 3.4 */ - -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include <cstdbool> -# endif -#elif defined(HAVE_STDBOOL_H) -# include <stdbool.h> -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - +#include "../json.h" +#include "../vendor/ryu.h" #include "../simd/simd.h" -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -#ifndef RB_LIKELY -#define RB_LIKELY(expr) expr -#endif - static VALUE mJSON, eNestingError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_chr, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_uminus, i_encode; +static ID i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, - sym_decimal_class, sym_on_load, sym_allow_duplicate_key; +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, + sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load, + sym_allow_duplicate_key; static int binary_encindex; static int utf8_encindex; #ifndef HAVE_RB_HASH_BULK_INSERT // For TruffleRuby -void +static void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) { long index = 0; @@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) #define rb_hash_new_capa(n) rb_hash_new() #endif +#ifndef HAVE_RB_STR_TO_INTERNED_STR +static VALUE rb_str_to_interned_str(VALUE str) +{ + return rb_funcall(rb_str_freeze(str), i_uminus, 0); +} +#endif /* name cache */ @@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring cache->entries[index] = rstring; } -static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +#define rstring_cache_memcmp memcmp + +#if JSON_CPU_LITTLE_ENDIAN_64BITS +#if __has_builtin(__builtin_bswap64) +#undef rstring_cache_memcmp +ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length) { - long rstring_length = RSTRING_LEN(rstring); - if (length == rstring_length) { - return memcmp(str, RSTRING_PTR(rstring), length); - } else { - return (int)(length - rstring_length); + // The libc memcmp has numerous complex optimizations, but in this particular case, + // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to + // inline a simpler memcmp outperforms calling the libc version. + long i = 0; + + for (; i + 8 <= length; i += 8) { + uint64_t a, b; + memcpy(&a, str + i, 8); + memcpy(&b, rptr + i, 8); + if (a != b) { + a = __builtin_bswap64(a); + b = __builtin_bswap64(b); + return (a < b) ? -1 : 1; + } } + + for (; i < length; i++) { + if (str[i] != rptr[i]) { + return (str[i] < rptr[i]) ? -1 : 1; + } + } + + return 0; } +#endif +#endif -static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring) { - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } + const char *rstring_ptr; + long rstring_length; - if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; + RSTRING_GETMEM(rstring, rstring_ptr, rstring_length); + + if (length == rstring_length) { + return rstring_cache_memcmp(str, rstring_ptr, length); + } else { + return (int)(length - rstring_length); } +} +ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ int low = 0; int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; while (low <= high) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, entry); + int cmp = rstring_cache_cmp(str, length, entry); - if (last_cmp == 0) { + if (cmp == 0) { return entry; - } else if (last_cmp > 0) { + } else if (cmp > 0) { low = mid + 1; } else { high = mid - 1; } } - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - VALUE rstring = build_interned_string(str, length); if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rstring); + rvalue_cache_insert_at(cache, low, rstring); } return rstring; } static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) { - if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { - // Common names aren't likely to be very long. So we just don't - // cache names above an arbitrary threshold. - return Qfalse; - } - - if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) { - // Simple heuristic, if the first character isn't a letter, - // we're much less likely to see this string again. - // We mostly want to cache strings that are likely to be repeated. - return Qfalse; - } - int low = 0; int high = cache->length - 1; - int mid = 0; - int last_cmp = 0; while (low <= high) { - mid = (high + low) >> 1; + int mid = (high + low) >> 1; VALUE entry = cache->entries[mid]; - last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); - if (last_cmp == 0) { + if (cmp == 0) { return entry; - } else if (last_cmp > 0) { + } else if (cmp > 0) { low = mid + 1; } else { high = mid - 1; } } - if (RB_UNLIKELY(memchr(str, '\\', length))) { - // We assume the overwhelming majority of names don't need to be escaped. - // But if they do, we have to fallback to the slow path. - return Qfalse; - } - VALUE rsymbol = build_symbol(str, length); if (cache->length < JSON_RVALUE_CACHE_CAPA) { - if (last_cmp > 0) { - mid += 1; - } - - rvalue_cache_insert_at(cache, mid, rsymbol); + rvalue_cache_insert_at(cache, low, rsymbol); } return rsymbol; } @@ -245,7 +211,7 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { stack = rvalue_stack_spill(stack, handle, stack_ref); } else { - REALLOC_N(stack->ptr, VALUE, required); + JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa); stack->capa = required; } return stack; @@ -275,35 +241,62 @@ static void rvalue_stack_mark(void *ptr) { rvalue_stack *stack = (rvalue_stack *)ptr; long index; - for (index = 0; index < stack->head; index++) { - rb_gc_mark(stack->ptr[index]); + if (stack && stack->ptr) { + for (index = 0; index < stack->head; index++) { + rb_gc_mark_movable(stack->ptr[index]); + } } } +static void rvalue_stack_free_buffer(rvalue_stack *stack) +{ + JSON_SIZED_FREE_N(stack->ptr, stack->capa); + stack->ptr = NULL; +} + static void rvalue_stack_free(void *ptr) { rvalue_stack *stack = (rvalue_stack *)ptr; if (stack) { - ruby_xfree(stack->ptr); - ruby_xfree(stack); + rvalue_stack_free_buffer(stack); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + JSON_SIZED_FREE(stack); +#endif } } static size_t rvalue_stack_memsize(const void *ptr) { const rvalue_stack *stack = (const rvalue_stack *)ptr; - return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; + size_t memsize = sizeof(VALUE) * stack->capa; +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + memsize += sizeof(rvalue_stack); +#endif + return memsize; +} + +static void rvalue_stack_compact(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + long index; + if (stack && stack->ptr) { + for (index = 0; index < stack->head; index++) { + stack->ptr[index] = rb_gc_location(stack->ptr[index]); + } + } } static const rb_data_type_t JSON_Parser_rvalue_stack_type = { - "JSON::Ext::Parser/rvalue_stack", - { + .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack", + .function = { .dmark = rvalue_stack_mark, .dfree = rvalue_stack_free, .dsize = rvalue_stack_memsize, + .dcompact = rvalue_stack_compact, }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY, + // We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED + // because it churns a lot of values so trigering write barriers every time is very costly. + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, }; static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) @@ -325,19 +318,206 @@ static void rvalue_stack_eagerly_release(VALUE handle) if (handle) { rvalue_stack *stack; TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); - RTYPEDDATA_DATA(handle) = NULL; +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + rvalue_stack_free_buffer(stack); +#else rvalue_stack_free(stack); + RTYPEDDATA_DATA(handle) = NULL; +#endif + } +} + +/* frame stack */ + +// Iterative (non-recursive) parsing keeps an explicit stack of the containers +// currently being built, instead of relying on the C call stack. Each frame +// only needs enough bookkeeping to close its container: which kind it is, the +// rvalue_stack position where its children start (so we know how many to pop), +// and the cursor at its opening brace (used to rewind for duplicate key +// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses +// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that +// it's freed even if parsing raises. +// +// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release +// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts +// -- the element type and the absence of a mark function are the only real +// differences. Keep the two in sync: a fix to the spill/release or +// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the +// other. +#define JSON_FRAME_STACK_INITIAL_CAPA 32 + +enum json_frame_type { + JSON_FRAME_ROOT, // == JSON_PHASE_DONE + JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA + JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA +}; + +// Where a frame is within its container's grammar. This is the entirety of the +// parser's "what to do next" state: json_parse_any dispatches on the top +// frame's phase and holds no resume state in C locals, so a parse can stop at +// any value boundary and be resumed purely from the (persistable) frame stack. +// +// The first three phases are deliberately equal to the corresponding json_frame_type +// to simplify the transition of phase in json_value_completed. +enum json_frame_phase { + JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed + JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']' + JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}' + JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':') + JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',') + JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':' +}; + +typedef struct json_frame_struct { + enum json_frame_type type; + enum json_frame_phase phase; + long value_stack_head; // rvalue_stack->head when this container opened + const char *start_cursor; // object frames only (the '{'); NULL otherwise +} json_frame; + +typedef struct json_frame_stack_struct { + enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated + long capa; + long head; + json_frame *ptr; +} json_frame_stack; + +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + +typedef struct JSON_ParserStruct { + VALUE on_load_proc; + VALUE decimal_class; + ID decimal_method_id; + enum duplicate_key_action on_duplicate_key; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool allow_control_characters; + bool allow_invalid_escape; + bool symbolize_names; + bool freeze; +} JSON_ParserConfig; + +typedef struct JSON_ParserStateStruct { + VALUE *value_stack_handle; + VALUE *frame_stack_handle; + const char *start; + const char *cursor; + const char *end; + rvalue_stack *value_stack; + json_frame_stack *frames; + rvalue_cache name_cache; + int in_array; + int current_nesting; + unsigned int emitted_deprecations; +} JSON_ParserState; + +static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref); + +static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = json_frame_stack_spill(stack, handle, stack_ref); + } else { + JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa); + stack->capa = required; } + return stack; } +static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame) +{ + json_frame_stack *stack = state->frames; + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames); + } + + json_frame *frame_ptr = &stack->ptr[stack->head++]; + *frame_ptr = frame; + return frame_ptr; +} -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) +static inline json_frame *json_frame_stack_peek(json_frame_stack *stack) { - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); + return &stack->ptr[stack->head - 1]; } + +static inline void json_frame_stack_pop(json_frame_stack *stack) +{ + stack->head--; +} + +static void json_frame_stack_free_buffer(json_frame_stack *stack) +{ + JSON_SIZED_FREE_N(stack->ptr, stack->capa); + stack->ptr = NULL; +} + +static void json_frame_stack_free(void *ptr) +{ + json_frame_stack *stack = (json_frame_stack *)ptr; + if (stack) { + json_frame_stack_free_buffer(stack); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + JSON_SIZED_FREE(stack); #endif + } +} + +static size_t json_frame_stack_memsize(const void *ptr) +{ + const json_frame_stack *stack = (const json_frame_stack *)ptr; + + size_t memsize = sizeof(json_frame) * stack->capa; +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + memsize += sizeof(json_frame_stack); +#endif + return memsize; +} + +static const rb_data_type_t JSON_Parser_frame_stack_type = { + .wrap_struct_name = "JSON::Ext::Parser/frame_stack", + .function = { + .dmark = NULL, + .dfree = json_frame_stack_free, + .dsize = json_frame_stack_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE, +}; + +static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref) +{ + json_frame_stack *stack; + *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, json_frame_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(json_frame, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head); + return stack; +} + +static void json_frame_stack_eagerly_release(VALUE handle) +{ + if (handle) { + json_frame_stack *stack; + TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + json_frame_stack_free_buffer(stack); +#else + json_frame_stack_free(stack); + RTYPEDDATA_DATA(handle) = NULL; +#endif + } +} static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) { @@ -365,38 +545,31 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } -enum duplicate_key_action { - JSON_DEPRECATED = 0, - JSON_IGNORE, - JSON_RAISE, -}; +static inline size_t rest(JSON_ParserState *state) { + return state->end - state->cursor; +} -typedef struct JSON_ParserStruct { - VALUE on_load_proc; - VALUE decimal_class; - ID decimal_method_id; - enum duplicate_key_action on_duplicate_key; - int max_nesting; - bool allow_nan; - bool allow_trailing_comma; - bool parsing_name; - bool symbolize_names; - bool freeze; -} JSON_ParserConfig; +static inline bool eos(JSON_ParserState *state) { + return state->cursor >= state->end; +} -typedef struct JSON_ParserStateStruct { - VALUE stack_handle; - const char *start; - const char *cursor; - const char *end; - rvalue_stack *stack; - rvalue_cache name_cache; - int in_array; - int current_nesting; -} JSON_ParserState; +static inline char peek(JSON_ParserState *state) +{ + if (RB_UNLIKELY(eos(state))) { + return 0; + } + return *state->cursor; +} static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out) { + JSON_ASSERT(state->cursor <= state->end); + + // Redundant but helpful for hardening + if (RB_UNLIKELY(state->cursor > state->end)) { + state->cursor = state->end; + } + const char *cursor = state->cursor; long column = 0; long line = 1; @@ -428,14 +601,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state) #define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, JSON_ParserState *state) +static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column) { unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3]; - long line, column; - cursor_position(state, &line, &column); const char *ptr = "EOF"; if (state->cursor && state->cursor < state->end) { @@ -467,20 +635,28 @@ static void raise_parse_error(const char *format, JSON_ParserState *state) } } - VALUE msg = rb_sprintf(format, ptr); - VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column); - RB_GC_GUARD(msg); + VALUE message = rb_enc_sprintf(enc_utf8, format, ptr); + rb_str_catf(message, " at line %ld column %ld", line, column); + return message; +} +static VALUE parse_error_new(VALUE message, long line, long column) +{ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message); rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line)); rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column)); - rb_exc_raise(exc); + return exc; } -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at) +NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state) +{ + long line, column; + cursor_position(state, &line, &column); + VALUE message = build_parse_error_message(format, state, line, column); + rb_exc_raise(parse_error_new(message, line, column)); +} + +NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at) { state->cursor = at; raise_parse_error(format, state); @@ -505,23 +681,24 @@ static const signed char digit_values[256] = { -1, -1, -1, -1, -1, -1, -1 }; -static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p) -{ - signed char b; - uint32_t result = 0; - b = digit_values[p[0]]; - if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2); - result = (result << 4) | (unsigned char)b; - b = digit_values[p[1]]; - if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2); - result = (result << 4) | (unsigned char)b; - b = digit_values[p[2]]; - if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2); - result = (result << 4) | (unsigned char)b; - b = digit_values[p[3]]; - if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2); - result = (result << 4) | (unsigned char)b; - return result; +static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe) +{ + if (RB_UNLIKELY(sp > spe - 4)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); + } + + const unsigned char *p = (const unsigned char *)sp; + + const signed char b0 = digit_values[p[0]]; + const signed char b1 = digit_values[p[1]]; + const signed char b2 = digit_values[p[2]]; + const signed char b3 = digit_values[p[3]]; + + if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) { + raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2); + } + + return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3; } #define GET_PARSER_CONFIG \ @@ -530,61 +707,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p static const rb_data_type_t JSON_ParserConfig_type; -static const bool whitespace[256] = { - [' '] = 1, - ['\t'] = 1, - ['\n'] = 1, - ['\r'] = 1, - ['/'] = 1, -}; - -static void +NOINLINE(static) void json_eat_comments(JSON_ParserState *state) { - if (state->cursor + 1 < state->end) { - switch (state->cursor[1]) { - case '/': { - state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); - if (!state->cursor) { - state->cursor = state->end; - } else { - state->cursor++; - } - break; + const char *start = state->cursor; + state->cursor++; + + switch (peek(state)) { + case '/': { + state->cursor = memchr(state->cursor, '\n', state->end - state->cursor); + if (!state->cursor) { + state->cursor = state->end; + } else { + state->cursor++; } - case '*': { - state->cursor += 2; - while (true) { - state->cursor = memchr(state->cursor, '*', state->end - state->cursor); - if (!state->cursor) { - raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end); - } else { - state->cursor++; - if (state->cursor < state->end && *state->cursor == '/') { - state->cursor++; - break; - } - } + break; + } + case '*': { + state->cursor++; + + while (true) { + const char *next_match = memchr(state->cursor, '*', state->end - state->cursor); + if (!next_match) { + raise_parse_error_at("unterminated comment, expected closing '*/'", state, start); + } + + state->cursor = next_match + 1; + if (peek(state) == '/') { + state->cursor++; + break; } - break; } - default: - raise_parse_error("unexpected token %s", state); - break; + break; } - } else { - raise_parse_error("unexpected token %s", state); + default: + raise_parse_error_at("unexpected token %s", state, start); + break; } } -static inline void +ALWAYS_INLINE(static) void json_eat_whitespace(JSON_ParserState *state) { - while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) { - if (RB_LIKELY(*state->cursor != '/')) { - state->cursor++; - } else { - json_eat_comments(state); + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + if (chunk == 0x2020202020202020) { + state->cursor += 8; + continue; + } + + uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + state->cursor += consecutive_spaces; + break; + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; } } } @@ -615,11 +813,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize) +static inline bool json_string_cacheable_p(const char *string, size_t length) { + // We mostly want to cache strings that are likely to be repeated. + // Simple heuristics: + // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold. + // - If the first character isn't a letter, we're much less likely to see this string again. + return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]); +} + +static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; size_t bufferSize = stringEnd - string; - if (is_name && state->in_array) { + if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); @@ -635,104 +844,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st return build_string(string, stringEnd, intern, symbolize); } -static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize) -{ - size_t bufferSize = stringEnd - string; - const char *p = string, *pe = string, *unescape, *bufferStart; - char *buffer; - int unescape_len; - char buf[4]; +#define JSON_MAX_UNESCAPE_POSITIONS 16 +typedef struct _json_unescape_positions { + long size; + const char **positions; + unsigned long additional_backslashes; +} JSON_UnescapePositions; - if (is_name && state->in_array) { - VALUE cached_key; - if (RB_UNLIKELY(symbolize)) { - cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize); - } else { - cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize); +static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions) +{ + while (positions->size) { + positions->size--; + const char *next_position = positions->positions[0]; + positions->positions++; + if (next_position >= pe) { + return next_position; } + } - if (RB_LIKELY(cached_key)) { - return cached_key; - } + if (positions->additional_backslashes) { + positions->additional_backslashes--; + return memchr(pe, '\\', stringEnd - pe); } + return NULL; +} + +NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions) +{ + bool intern = is_name || config->freeze; + bool symbolize = is_name && config->symbolize_names; + size_t bufferSize = stringEnd - string; + const char *p = string, *pe = string, *bufferStart; + char *buffer; + VALUE result = rb_str_buf_new(bufferSize); rb_enc_associate_index(result, utf8_encindex); buffer = RSTRING_PTR(result); bufferStart = buffer; - while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) { - unescape = (char *) "?"; - unescape_len = 1; +#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe; + + while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) { if (pe > p) { MEMCPY(buffer, p, char, pe - p); buffer += pe - p; } switch (*++pe) { + case '"': + case '/': + p = pe; // nothing to unescape just need to skip the backslash + break; + case '\\': + APPEND_CHAR('\\'); + break; case 'n': - unescape = (char *) "\n"; + APPEND_CHAR('\n'); break; case 'r': - unescape = (char *) "\r"; + APPEND_CHAR('\r'); break; case 't': - unescape = (char *) "\t"; - break; - case '"': - unescape = (char *) "\""; - break; - case '\\': - unescape = (char *) "\\"; + APPEND_CHAR('\t'); break; case 'b': - unescape = (char *) "\b"; + APPEND_CHAR('\b'); break; case 'f': - unescape = (char *) "\f"; + APPEND_CHAR('\f'); break; - case 'u': - if (pe > stringEnd - 5) { - raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p); - } else { - uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe); - pe += 3; - /* To handle values above U+FFFF, we take a sequence of - * \uXXXX escapes in the U+D800..U+DBFF then - * U+DC00..U+DFFF ranges, take the low 10 bits from each - * to make a 20-bit number, then add 0x10000 to get the - * final codepoint. - * - * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling - * Surrogate Pairs in UTF-16", and 23.6 "Surrogates - * Area". - */ - if ((ch & 0xFC00) == 0xD800) { - pe++; - if (pe > stringEnd - 6) { - raise_parse_error_at("incomplete surrogate pair at %s", state, p); - } - if (pe[0] == '\\' && pe[1] == 'u') { - uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2); - ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) - | (sur & 0x3FF)); - pe += 5; - } else { - unescape = (char *) "?"; - break; + case 'u': { + uint32_t ch = unescape_unicode(state, ++pe, stringEnd); + pe += 3; + /* To handle values above U+FFFF, we take a sequence of + * \uXXXX escapes in the U+D800..U+DBFF then + * U+DC00..U+DFFF ranges, take the low 10 bits from each + * to make a 20-bit number, then add 0x10000 to get the + * final codepoint. + * + * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling + * Surrogate Pairs in UTF-16", and 23.6 "Surrogates + * Area". + */ + if ((ch & 0xFC00) == 0xD800) { + pe++; + if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) { + uint32_t sur = unescape_unicode(state, pe + 2, stringEnd); + + if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) { + raise_parse_error_at("invalid surrogate pair at %s", state, p); } + + ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF)); + pe += 5; + } else { + raise_parse_error_at("incomplete surrogate pair at %s", state, p); + break; } - unescape_len = convert_UTF32_to_UTF8(buf, ch); - unescape = buf; } + + int unescape_len = convert_UTF32_to_UTF8(buffer, ch); + buffer += unescape_len; + p = ++pe; break; + } default: - p = pe; - continue; + if ((unsigned char)*pe < 0x20) { + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } + } + + if (config->allow_invalid_escape) { + APPEND_CHAR(*pe); + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); + } + break; } - MEMCPY(buffer, unescape, char, unescape_len); - buffer += unescape_len; - p = ++pe; } +#undef APPEND_CHAR if (stringEnd > p) { MEMCPY(buffer, p, char, stringEnd - p); @@ -743,87 +977,99 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c if (symbolize) { result = rb_str_intern(result); } else if (intern) { - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + result = rb_str_to_interned_str(result); } return result; } #define MAX_FAST_INTEGER_SIZE 18 -static inline VALUE fast_decode_integer(const char *p, const char *pe) -{ - bool negative = false; - if (*p == '-') { - negative = true; - p++; - } +#define MAX_NUMBER_STACK_BUFFER 128 - long long memo = 0; - while (p < pe) { - memo *= 10; - memo += *p - '0'; - p++; - } +typedef VALUE (*json_number_decode_func_t)(const char *ptr); - if (negative) { - memo = -memo; +static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func) +{ + if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) { + char buffer[MAX_NUMBER_STACK_BUFFER]; + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + return func(buffer); + } else { + VALUE buffer_v = rb_str_tmp_new(len); + char *buffer = RSTRING_PTR(buffer_v); + MEMCPY(buffer, start, char, len); + buffer[len] = '\0'; + VALUE number = func(buffer); + RB_GC_GUARD(buffer_v); + return number; } - return LL2NUM(memo); } -static VALUE json_decode_large_integer(const char *start, long len) +static VALUE json_decode_inum(const char *buffer) +{ + return rb_cstr2inum(buffer, 10); +} + +NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len) { - VALUE buffer_v; - char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); - MEMCPY(buffer, start, char, len); - buffer[len] = '\0'; - VALUE number = rb_cstr2inum(buffer, 10); - RB_ALLOCV_END(buffer_v); - return number; + return json_decode_large_number(start, len, json_decode_inum); } -static inline VALUE -json_decode_integer(const char *start, const char *end) +static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end) { - long len = end - start; - if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { - return fast_decode_integer(start, end); + if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) { + if (negative) { + return INT64T2NUM(-((int64_t)mantissa)); } - return json_decode_large_integer(start, len); + return UINT64T2NUM(mantissa); + } + + return json_decode_large_integer(start, end - start); } -static VALUE json_decode_large_float(const char *start, long len) +static VALUE json_decode_dnum(const char *buffer) { - VALUE buffer_v; - char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); - MEMCPY(buffer, start, char, len); - buffer[len] = '\0'; - VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1)); - RB_ALLOCV_END(buffer_v); - return number; + return DBL2NUM(rb_cstr_to_dbl(buffer, 1)); } -static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end) +NOINLINE(static) VALUE json_decode_large_float(const char *start, long len) { - long len = end - start; + return json_decode_large_number(start, len, json_decode_dnum); +} +/* Ruby JSON optimized float decoder using vendored Ryu algorithm + * Accepts pre-extracted mantissa and exponent from first-pass validation + */ +static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative, + const char *start, const char *end) +{ if (RB_UNLIKELY(config->decimal_class)) { - VALUE text = rb_str_new(start, len); + VALUE text = rb_str_new(start, end - start); return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text); - } else if (RB_LIKELY(len < 64)) { - char buffer[64]; - MEMCPY(buffer, start, char, len); - buffer[len] = '\0'; - return DBL2NUM(rb_cstr_to_dbl(buffer, 1)); - } else { - return json_decode_large_float(start, len); } + + if (RB_UNLIKELY(exponent > INT32_MAX)) { + return negative ? CMinusInfinity : CInfinity; + } + + if (RB_UNLIKELY(exponent < INT32_MIN)) { + return rb_float_new(negative ? -0.0 : 0.0); + } + + // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case) + // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308) + if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) { + return json_decode_large_float(start, end - start); + } + + return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative)); } static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count) { - VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count)); - rvalue_stack_pop(state->stack, count); + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count)); + rvalue_stack_pop(state->value_stack, count); if (config->freeze) { RB_OBJ_FREEZE(array); @@ -849,7 +1095,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs) return Qfalse; } -static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key) +NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key) { VALUE message = rb_sprintf( "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", @@ -860,41 +1106,52 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_ RB_GC_GUARD(message); } -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key) +NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key) { VALUE message = rb_sprintf( "duplicate key %"PRIsVALUE, rb_inspect(duplicate_key) ); - raise_parse_error(RSTRING_PTR(message), state); - RB_GC_GUARD(message); + long line, column; + cursor_position(state, &line, &column); + rb_str_concat(message, build_parse_error_message("", state, line, column)) ; + rb_exc_raise(parse_error_new(message, line, column)); +} + +NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs) +{ + switch (config->on_duplicate_key) { + case JSON_IGNORE: + return; + + case JSON_DEPRECATED: + // Only emit the first few deprecations to avoid spamming. + if (state->emitted_deprecations < 5) { + emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); + state->emitted_deprecations++; + } + return; + + case JSON_RAISE: + raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs)); + return; + } + UNREACHABLE; } static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count) { size_t entries_count = count / 2; VALUE object = rb_hash_new_capa(entries_count); - const VALUE *pairs = rvalue_stack_peek(state->stack, count); + const VALUE *pairs = rvalue_stack_peek(state->value_stack, count); rb_hash_bulk_insert(count, pairs, object); if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) { - switch (config->on_duplicate_key) { - case JSON_IGNORE: - break; - case JSON_DEPRECATED: - emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); - break; - case JSON_RAISE: - raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs)); - break; - } + json_on_duplicate_key(state, config, count, pairs); } - rvalue_stack_pop(state->stack, count); + rvalue_stack_pop(state->value_stack, count); if (config->freeze) { RB_OBJ_FREEZE(object); @@ -903,26 +1160,12 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi return object; } -static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name) -{ - VALUE string; - bool intern = is_name || config->freeze; - bool symbolize = is_name && config->symbolize_names; - if (escaped) { - string = json_string_unescape(state, start, end, is_name, intern, symbolize); - } else { - string = json_string_fastpath(state, start, end, is_name, intern, symbolize); - } - - return string; -} - static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value) { if (RB_UNLIKELY(config->on_load_proc)) { value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil); } - rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack); + rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack); return value; } @@ -939,17 +1182,11 @@ static const bool string_scan_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -#if (defined(__GNUC__ ) || defined(__clang__)) -#define FORCE_INLINE __attribute__((always_inline)) -#else -#define FORCE_INLINE -#endif - #ifdef HAVE_SIMD static SIMD_Implementation simd_impl = SIMD_NONE; #endif /* HAVE_SIMD */ -static inline bool FORCE_INLINE string_scan(JSON_ParserState *state) +ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state) { #ifdef HAVE_SIMD #if defined(HAVE_SIMD_NEON) @@ -957,7 +1194,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state) uint64_t mask = 0; if (string_scan_simd_neon(&state->cursor, state->end, &mask)) { state->cursor += trailing_zeros64(mask) >> 2; - return 1; + return true; } #elif defined(HAVE_SIMD_SSE2) @@ -965,313 +1202,574 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state) int mask = 0; if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) { state->cursor += trailing_zeros(mask); - return 1; + return true; } } #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ #endif /* HAVE_SIMD */ - while (state->cursor < state->end) { + while (!eos(state)) { if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { - return 1; + return true; } - *state->cursor++; + state->cursor++; } - return 0; + + // If the string ended with an unterminated escape sequence, we might + // have gone past the end. + if (RB_UNLIKELY(state->cursor > state->end)) { + state->cursor = state->end; + } + + return false; } -static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) +static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start) { - state->cursor++; - const char *start = state->cursor; - bool escaped = false; + const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS]; + JSON_UnescapePositions positions = { + .size = 0, + .positions = backslashes, + .additional_backslashes = 0, + }; - while (RB_UNLIKELY(string_scan(state))) { + do { switch (*state->cursor) { case '"': { - VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name); + VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions); state->cursor++; - return json_push_value(state, config, string); + return string; } case '\\': { - state->cursor++; - escaped = true; - if ((unsigned char)*state->cursor < 0x20) { - raise_parse_error("invalid ASCII control character in string: %s", state); + if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) { + backslashes[positions.size] = state->cursor; + positions.size++; + } else { + positions.additional_backslashes++; } + state->cursor++; break; } default: - raise_parse_error("invalid ASCII control character in string: %s", state); + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } break; } state->cursor++; - } + } while (string_scan(state)); raise_parse_error("unexpected end of input, expected closing \"", state); return Qfalse; } -static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) +ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) { - json_eat_whitespace(state); - if (state->cursor >= state->end) { - raise_parse_error("unexpected end of input", state); + state->cursor++; + const char *start = state->cursor; + + if (RB_UNLIKELY(!string_scan(state))) { + raise_parse_error("unexpected end of input, expected closing \"", state); } - switch (*state->cursor) { - case 'n': - if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) { - state->cursor += 4; - return json_push_value(state, config, Qnil); - } + VALUE string; + if (RB_LIKELY(*state->cursor == '"')) { + string = json_string_fastpath(state, config, start, state->cursor, is_name); + state->cursor++; + } + else { + string = json_parse_escaped_string(state, config, is_name, start); + } - raise_parse_error("unexpected token %s", state); - break; - case 't': - if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) { - state->cursor += 4; - return json_push_value(state, config, Qtrue); - } + return string; +} - raise_parse_error("unexpected token %s", state); - break; - case 'f': - // Note: memcmp with a small power of two compile to an integer comparison - if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) { - state->cursor += 5; - return json_push_value(state, config, Qfalse); - } +#if JSON_CPU_LITTLE_ENDIAN_64BITS +// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ +// Additional References: +// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html +static inline uint64_t decode_8digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return val; +} - raise_parse_error("unexpected token %s", state); - break; - case 'N': - // Note: memcmp with a small power of two compile to an integer comparison - if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) { - state->cursor += 3; - return json_push_value(state, config, CNaN); - } +static inline uint64_t decode_4digits_unrolled(uint32_t val) { + const uint32_t mask = 0x000000FF; + const uint32_t mul1 = 100; + val -= 0x30303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = ((val & mask) * mul1) + (((val >> 16) & mask)); + return val; +} +#endif - raise_parse_error("unexpected token %s", state); - break; - case 'I': - if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) { - state->cursor += 8; - return json_push_value(state, config, CInfinity); - } +static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator) +{ + const char *start = state->cursor; - raise_parse_error("unexpected token %s", state); - break; - case '-': - // Note: memcmp with a small power of two compile to an integer comparison - if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { - if (config->allow_nan) { - state->cursor += 9; - return json_push_value(state, config, CMinusInfinity); - } else { - raise_parse_error("unexpected token %s", state); - } - } - // Fallthrough - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - bool integer = true; +#if JSON_CPU_LITTLE_ENDIAN_64BITS + while (rest(state) >= sizeof(uint64_t)) { + uint64_t next_8bytes; + memcpy(&next_8bytes, state->cursor, sizeof(uint64_t)); + + // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333 + // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html + uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4); + + if (match == 0x3333333333333333) { // 8 consecutive digits + *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes); + state->cursor += 8; + continue; + } + + uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT; - // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/ - const char *start = state->cursor; + if (consecutive_digits >= 4) { + *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); + state->cursor += 4; + consecutive_digits -= 4; + } + + while (consecutive_digits) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + consecutive_digits--; state->cursor++; + } - while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) { - state->cursor++; - } + return (int)(state->cursor - start); + } +#endif + + char next_char; + while (rb_isdigit(next_char = peek(state))) { + *accumulator = *accumulator * 10 + (next_char - '0'); + state->cursor++; + } + return (int)(state->cursor - start); +} - long integer_length = state->cursor - start; +static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start) +{ + bool integer = true; + const char first_digit = *state->cursor; - if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) { - raise_parse_error_at("invalid number: %s", state, start); - } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) { - raise_parse_error_at("invalid number: %s", state, start); - } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) { - raise_parse_error_at("invalid number: %s", state, start); - } + // Variables for Ryu optimization - extract digits during parsing + int64_t exponent = 0; + int decimal_point_pos = -1; + uint64_t mantissa = 0; - if ((state->cursor < state->end) && (*state->cursor == '.')) { - integer = false; - state->cursor++; + // Parse integer part and extract mantissa digits + int mantissa_digits = json_parse_digits(state, &mantissa); + + if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) { + raise_parse_error_at("invalid number: %s", state, start); + } + + // Parse fractional part + if (peek(state) == '.') { + integer = false; + decimal_point_pos = mantissa_digits; // Remember position of decimal point + state->cursor++; + + int fractional_digits = json_parse_digits(state, &mantissa); + mantissa_digits += fractional_digits; + + if (RB_UNLIKELY(!fractional_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + } + + // Parse exponent + if (rb_tolower(peek(state)) == 'e') { + integer = false; + state->cursor++; + + bool negative_exponent = false; + const char next_char = peek(state); + if (next_char == '-' || next_char == '+') { + negative_exponent = next_char == '-'; + state->cursor++; + } + + uint64_t abs_exponent = 0; + int exponent_digits = json_parse_digits(state, &abs_exponent); + + if (RB_UNLIKELY(!exponent_digits)) { + raise_parse_error_at("invalid number: %s", state, start); + } + + if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) { + exponent = negative_exponent ? INT64_MIN : INT64_MAX; + } else { + exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent; + } + } + + if (integer) { + return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor); + } + + // Adjust exponent based on decimal point position + if (decimal_point_pos >= 0) { + exponent -= (mantissa_digits - decimal_point_pos); + } + + return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor); +} + +static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + return json_parse_number(state, config, false, state->cursor); +} + +static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) +{ + return json_parse_number(state, config, true, state->cursor - 1); +} - if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') { - raise_parse_error("invalid number: %s", state); +// How many values (array elements, or interleaved object keys+values) have been +// pushed onto the rvalue stack since this container opened. Used to size the +// bulk decode on close, and to tell the first key/colon from later ones. +static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack) +{ + return value_stack->head - frame->value_stack_head; +} + +// A complete value now sits on top of the rvalue stack. Advance the frame that +// was waiting for it: the root document is done, or the enclosing container +// moves on to expecting a ',' or its closing bracket. The caller passes the +// frame it already has in hand -- the one that was expecting the value -- which +// after a container close is the freshly re-exposed parent. +static inline void json_value_completed(json_frame *frame) +{ + JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT); + JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY); + JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT); + + frame->phase = (enum json_frame_phase) frame->type; +} + +ALWAYS_INLINE(static) bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset) +{ + // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this + // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch. + + size_t len = strlen(keyword); + + // Note: memcmp with a small power of two and a literal string compile to an integer comparison / + // That's why we sometime compare starting from the first byte and sometimes from the second. + if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) { + state->cursor += len; + return true; + } + return false; +} + +// Parse an arbitrary JSON value iteratively. This is a state machine driven +// entirely by the top frame's phase so it can stop at any value boundary and +// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the +// bottom of the stack, so the stack is never empty mid-parse and the document +// itself is just another frame whose value, once parsed, leaves its phase DONE. +static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) +{ + json_frame *frame = json_frame_stack_peek(state->frames); + + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY; + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + UNREACHABLE_RETURN(Qundef); + + JSON_PHASE_DONE: { + // The root document value is parsed; it is the lone survivor on + // the rvalue stack. + return *rvalue_stack_peek(state->value_stack, 1); + } + + JSON_PHASE_VALUE: { + json_eat_whitespace(state); + + VALUE value; + switch (peek(state)) { + case 'n': + if (json_match_keyword(state, "null", 0)) { + value = Qnil; + break; } + raise_parse_error("unexpected token %s", state); - while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) { - state->cursor++; + case 't': + if (json_match_keyword(state, "true", 0)) { + value = Qtrue; + break; } - } + raise_parse_error("unexpected token %s", state); - if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) { - integer = false; - state->cursor++; - if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) { - state->cursor++; + case 'f': + if (json_match_keyword(state, "false", 1)) { + value = Qfalse; + break; } + raise_parse_error("unexpected token %s", state); - if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') { - raise_parse_error("invalid number: %s", state); + case 'N': + // Note: memcmp with a small power of two compile to an integer comparison + if (config->allow_nan && json_match_keyword(state, "NaN", 1)) { + value = CNaN; + break; } + raise_parse_error("unexpected token %s", state); - while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) { - state->cursor++; + case 'I': + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + value = CInfinity; + break; } - } + raise_parse_error("unexpected token %s", state); - if (integer) { - return json_push_value(state, config, json_decode_integer(start, state->cursor)); + case '-': { + state->cursor++; + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + value = CMinusInfinity; + } else { + value = json_parse_negative_number(state, config); + } + break; } - return json_push_value(state, config, json_decode_float(config, start, state->cursor)); - } - case '"': { - // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} - return json_parse_string(state, config, false); - break; - } - case '[': { - state->cursor++; - json_eat_whitespace(state); - long stack_head = state->stack->head; - if ((state->cursor < state->end) && (*state->cursor == ']')) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + value = json_parse_positive_number(state, config); + break; + + case '"': + // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"} + value = json_parse_string(state, config, false); + break; + + case '[': { state->cursor++; - return json_push_value(state, config, json_decode_array(state, config, 0)); - } else { + json_eat_whitespace(state); + + if (peek(state) == ']') { + state->cursor++; + value = json_decode_array(state, config, 0); + break; + } + state->current_nesting++; if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); } state->in_array++; - json_parse_any(state, config); + + // Phase stays VALUE: the next iteration reads the first element. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_ARRAY, + .phase = JSON_PHASE_VALUE, + .value_stack_head = state->value_stack->head, + }); + goto JSON_PHASE_VALUE; } + case '{': { + const char *object_start_cursor = state->cursor; - while (true) { + state->cursor++; json_eat_whitespace(state); - if (state->cursor < state->end) { - if (*state->cursor == ']') { - state->cursor++; - long count = state->stack->head - stack_head; - state->current_nesting--; - state->in_array--; - return json_push_value(state, config, json_decode_array(state, config, count)); - } + if (peek(state) == '}') { + state->cursor++; + value = json_decode_object(state, config, 0); + break; + } - if (*state->cursor == ',') { - state->cursor++; - if (config->allow_trailing_comma) { - json_eat_whitespace(state); - if ((state->cursor < state->end) && (*state->cursor == ']')) { - continue; - } - } - json_parse_any(state, config); - continue; - } + state->current_nesting++; + if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { + rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); } - raise_parse_error("expected ',' or ']' after array value", state); + // Phase KEY: the next iteration reads the first key. + frame = json_frame_stack_push(state, (json_frame){ + .type = JSON_FRAME_OBJECT, + .phase = JSON_PHASE_OBJECT_KEY, + .value_stack_head = state->value_stack->head, + .start_cursor = object_start_cursor, + }); + goto JSON_PHASE_OBJECT_KEY; } - break; + + case 0: + raise_parse_error("unexpected end of input", state); + + default: + raise_parse_error("unexpected character: %s", state); } - case '{': { - const char *object_start_cursor = state->cursor; - state->cursor++; - json_eat_whitespace(state); - long stack_head = state->stack->head; + json_push_value(state, config, value); + json_value_completed(frame); - if ((state->cursor < state->end) && (*state->cursor == '}')) { - state->cursor++; - return json_push_value(state, config, json_decode_object(state, config, 0)); - } else { - state->current_nesting++; - if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) { - rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting); - } + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef); + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + UNREACHABLE_RETURN(Qundef); + } - if (*state->cursor != '"') { - raise_parse_error("expected object key, got %s", state); - } - json_parse_string(state, config, true); + JSON_PHASE_OBJECT_KEY: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - json_eat_whitespace(state); - if ((state->cursor >= state->end) || (*state->cursor != ':')) { - raise_parse_error("expected ':' after object key", state); - } - state->cursor++; + json_eat_whitespace(state); - json_parse_any(state, config); + if (RB_LIKELY(peek(state) == '"')) { + json_push_value(state, config, json_parse_string(state, config, true)); + frame->phase = JSON_PHASE_OBJECT_COLON; + goto JSON_PHASE_OBJECT_COLON; + } else { + // The message differs for the first key vs. a key after a + // ',': the first is the only one reached with nothing pushed + // for this object yet. + if (json_frame_entry_count(frame, state->value_stack) == 0) { + raise_parse_error("expected object key, got %s", state); + } else { + raise_parse_error("expected object key, got: %s", state); } + } + UNREACHABLE_RETURN(Qundef); + } - while (true) { - json_eat_whitespace(state); + JSON_PHASE_OBJECT_COLON: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - if (state->cursor < state->end) { - if (*state->cursor == '}') { - state->cursor++; - state->current_nesting--; - size_t count = state->stack->head - stack_head; + json_eat_whitespace(state); - // Temporary rewind cursor in case an error is raised - const char *final_cursor = state->cursor; - state->cursor = object_start_cursor; - VALUE object = json_decode_object(state, config, count); - state->cursor = final_cursor; + if (RB_LIKELY(peek(state) == ':')) { + state->cursor++; + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else { + // First colon (only the first pair's key is pushed, nothing + // else) vs. a later one. + if (json_frame_entry_count(frame, state->value_stack) == 1) { + raise_parse_error("expected ':' after object key", state); + } else { + raise_parse_error("expected ':' after object key, got: %s", state); + } + } + UNREACHABLE_RETURN(Qundef); + } - return json_push_value(state, config, object); - } + JSON_PHASE_ARRAY_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_ARRAY); - if (*state->cursor == ',') { - state->cursor++; - json_eat_whitespace(state); + json_eat_whitespace(state); - if (config->allow_trailing_comma) { - if ((state->cursor < state->end) && (*state->cursor == '}')) { - continue; - } - } + const char next_char = peek(state); - if (*state->cursor != '"') { - raise_parse_error("expected object key, got: %s", state); - } - json_parse_string(state, config, true); + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == ']') { + // Trailing comma: stay in COMMA to close on the next iteration. + goto JSON_PHASE_ARRAY_COMMA; + } + } + frame->phase = JSON_PHASE_VALUE; + goto JSON_PHASE_VALUE; + } else if (next_char == ']') { + state->cursor++; + long count = json_frame_entry_count(frame, state->value_stack); + state->current_nesting--; + state->in_array--; + json_frame_stack_pop(state->frames); + json_push_value(state, config, json_decode_array(state, config, count)); + frame = json_frame_stack_peek(state->frames); + json_value_completed(frame); + + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef); + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; + } + } else { + raise_parse_error("expected ',' or ']' after array value", state); + } + UNREACHABLE_RETURN(Qundef); + } - json_eat_whitespace(state); - if ((state->cursor >= state->end) || (*state->cursor != ':')) { - raise_parse_error("expected ':' after object key, got: %s", state); - } - state->cursor++; + JSON_PHASE_OBJECT_COMMA: { + JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - json_parse_any(state, config); + json_eat_whitespace(state); + const char next_char = peek(state); - continue; - } + if (RB_LIKELY(next_char == ',')) { + state->cursor++; + + if (config->allow_trailing_comma) { + json_eat_whitespace(state); + if (peek(state) == '}') { + // Trailing comma: stay in COMMA to close on the next iteration. + goto JSON_PHASE_OBJECT_COMMA; } + } - raise_parse_error("expected ',' or '}' after object value, got: %s", state); + frame->phase = JSON_PHASE_OBJECT_KEY; + goto JSON_PHASE_OBJECT_KEY; + } else if (next_char == '}') { + state->cursor++; + state->current_nesting--; + size_t count = json_frame_entry_count(frame, state->value_stack); + + // Temporary rewind cursor in case an error is raised + const char *final_cursor = state->cursor; + state->cursor = frame->start_cursor; + VALUE object = json_decode_object(state, config, count); + state->cursor = final_cursor; + + json_push_value(state, config, object); + json_frame_stack_pop(state->frames); + frame = json_frame_stack_peek(state->frames); + json_value_completed(frame); + + switch (frame->phase) { + case JSON_PHASE_DONE: goto JSON_PHASE_DONE; + case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA; + case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA; + case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE; + case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef); + case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON; } - break; + } else { + raise_parse_error("expected ',' or '}' after object value, got: %s", state); } - - default: - raise_parse_error("unexpected character: %s", state); - break; + UNREACHABLE_RETURN(Qundef); } - raise_parse_error("unreachable: %s", state); + UNREACHABLE_RETURN(Qundef); } static void json_ensure_eof(JSON_ParserState *state) { json_eat_whitespace(state); - if (state->cursor != state->end) { + if (!eos(state)) { raise_parse_error("unexpected token at end of stream %s", state); } } @@ -1290,38 +1788,56 @@ static void json_ensure_eof(JSON_ParserState *state) static VALUE convert_encoding(VALUE source) { - int encindex = RB_ENCODING_GET(source); + StringValue(source); + int encindex = RB_ENCODING_GET(source); - if (RB_LIKELY(encindex == utf8_encindex)) { + if (RB_LIKELY(encindex == utf8_encindex)) { + return source; + } + + if (encindex == binary_encindex) { + // For historical reason, we silently reinterpret binary strings as UTF-8 + return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); + } + + source = rb_funcall(source, i_encode, 1, Encoding_UTF_8); + StringValue(source); return source; - } +} - if (encindex == binary_encindex) { - // For historical reason, we silently reinterpret binary strings as UTF-8 - return rb_enc_associate_index(rb_str_dup(source), utf8_encindex); - } +struct parser_config_init_args { + JSON_ParserConfig *config; + VALUE self; +}; - return rb_funcall(source, i_encode, 1, Encoding_UTF_8); +static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val) +{ + *dest = val; + if (self) RB_OBJ_WRITTEN(self, Qundef, val); } static int parser_config_init_i(VALUE key, VALUE val, VALUE data) { - JSON_ParserConfig *config = (JSON_ParserConfig *)data; - - if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { config->freeze = RTEST(val); } - else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } - else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } - else if (key == sym_decimal_class) { + struct parser_config_init_args *args = (struct parser_config_init_args *)data; + JSON_ParserConfig *config = args->config; + VALUE self = args->self; + + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { parser_config_wb_write(self, &config->on_load_proc, RTEST(val) ? val : Qfalse); } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { if (RTEST(val)) { if (rb_respond_to(val, i_try_convert)) { - config->decimal_class = val; + parser_config_wb_write(self, &config->decimal_class, val); config->decimal_method_id = i_try_convert; } else if (rb_respond_to(val, i_new)) { - config->decimal_class = val; + parser_config_wb_write(self, &config->decimal_class, val); config->decimal_method_id = i_new; } else if (RB_TYPE_P(val, T_CLASS)) { VALUE name = rb_class_name(val); @@ -1330,7 +1846,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) if (last_colon) { const char *mod_path_end = last_colon - 1; VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr); - config->decimal_class = rb_path_to_class(mod_path); + parser_config_wb_write(self, &config->decimal_class, rb_path_to_class(mod_path)); const char *method_name_beg = last_colon + 1; long before_len = method_name_beg - name_cstr; @@ -1338,7 +1854,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) VALUE method_name = rb_str_substr(name, before_len, len); config->decimal_method_id = SYM2ID(rb_str_intern(method_name)); } else { - config->decimal_class = rb_mKernel; + parser_config_wb_write(self, &config->decimal_class, rb_mKernel); config->decimal_method_id = SYM2ID(rb_str_intern(name)); } } @@ -1348,16 +1864,21 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) return ST_CONTINUE; } -static void parser_config_init(JSON_ParserConfig *config, VALUE opts) +static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self) { config->max_nesting = 100; + struct parser_config_init_args args = { + .config = config, + .self = self, + }; + if (!NIL_P(opts)) { Check_Type(opts, T_HASH); if (RHASH_SIZE(opts) > 0) { // We assume in most cases few keys are set so it's faster to go over // the provided keys than to check all possible keys. - rb_hash_foreach(opts, parser_config_init_i, (VALUE)config); + rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args); } } @@ -1388,36 +1909,62 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts) */ static VALUE cParserConfig_initialize(VALUE self, VALUE opts) { + rb_check_frozen(self); GET_PARSER_CONFIG; - parser_config_init(config, opts); - - RB_OBJ_WRITTEN(self, Qundef, config->decimal_class); + parser_config_init(config, opts, self); return self; } -static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource) +static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src) { - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); + VALUE Vsource = convert_encoding(src); + + // Ensure the string isn't mutated under us. + // The classic API to use is `rb_str_locktmp`, but then we'd + // need to use `rb_protect` to make sure we always unlock. + if (Vsource == src) { + Vsource = rb_str_new_frozen(Vsource); + } VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; - rvalue_stack stack = { + rvalue_stack value_stack = { .type = RVALUE_STACK_STACK_ALLOCATED, .ptr = rvalue_stack_buffer, .capa = RVALUE_STACK_INITIAL_CAPA, }; + // Seed the frame stack with the root frame, establishing the invariant that + // json_parse_any always has a top frame to dispatch on (so the stack is never + // empty mid-parse). + json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA]; + frame_stack_buffer[0] = (json_frame){ + .type = JSON_FRAME_ROOT, + .phase = JSON_PHASE_VALUE, + }; + json_frame_stack frames = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = frame_stack_buffer, + .capa = JSON_FRAME_STACK_INITIAL_CAPA, + .head = 1, + }; + long len; const char *start; + RSTRING_GETMEM(Vsource, start, len); + VALUE value_stack_handle = 0; + VALUE frame_stack_handle = 0; JSON_ParserState _state = { .start = start, .cursor = start, .end = start + len, - .stack = &stack, + .value_stack = &value_stack, + .value_stack_handle = &value_stack_handle, + .frames = &frames, + .frame_stack_handle = &frame_stack_handle, }; JSON_ParserState *state = &_state; @@ -1425,8 +1972,11 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource) // This may be skipped in case of exception, but // it won't cause a leak. - rvalue_stack_eagerly_release(state->stack_handle); - + rvalue_stack_eagerly_release(value_stack_handle); + json_frame_stack_eagerly_release(frame_stack_handle); + RB_GC_GUARD(value_stack_handle); + RB_GC_GUARD(frame_stack_handle); + RB_GC_GUARD(Vsource); json_ensure_eof(state); return result; @@ -1447,12 +1997,9 @@ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource) static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) { - Vsource = convert_encoding(StringValue(Vsource)); - StringValue(Vsource); - JSON_ParserConfig _config = {0}; JSON_ParserConfig *config = &_config; - parser_config_init(config, opts); + parser_config_init(config, opts, false); return cParser_parse(config, Vsource); } @@ -1460,30 +2007,35 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts) static void JSON_ParserConfig_mark(void *ptr) { JSON_ParserConfig *config = ptr; - rb_gc_mark(config->on_load_proc); - rb_gc_mark(config->decimal_class); + rb_gc_mark_movable(config->on_load_proc); + rb_gc_mark_movable(config->decimal_class); } -static void JSON_ParserConfig_free(void *ptr) +static size_t JSON_ParserConfig_memsize(const void *ptr) { - JSON_ParserConfig *config = ptr; - ruby_xfree(config); +#ifdef HAVE_RUBY_TYPED_EMBEDDABLE + return 0; +#else + return sizeof(JSON_ParserConfig); +#endif } -static size_t JSON_ParserConfig_memsize(const void *ptr) +static void JSON_ParserConfig_compact(void *ptr) { - return sizeof(JSON_ParserConfig); + JSON_ParserConfig *config = ptr; + config->on_load_proc = rb_gc_location(config->on_load_proc); + config->decimal_class = rb_gc_location(config->decimal_class); } static const rb_data_type_t JSON_ParserConfig_type = { - "JSON::Ext::Parser/ParserConfig", - { - JSON_ParserConfig_mark, - JSON_ParserConfig_free, - JSON_ParserConfig_memsize, + .wrap_struct_name = "JSON::Ext::Parser/ParserConfig", + .function = { + .dmark = JSON_ParserConfig_mark, + .dfree = RUBY_DEFAULT_FREE, + .dsize = JSON_ParserConfig_memsize, + .dcompact = JSON_ParserConfig_compact, }, - 0, 0, - RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE, }; static VALUE cJSON_parser_s_allocate(VALUE klass) @@ -1527,16 +2079,14 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); + sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); sym_on_load = ID2SYM(rb_intern("on_load")); sym_decimal_class = ID2SYM(rb_intern("decimal_class")); sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); - i_chr = rb_intern("chr"); - i_aset = rb_intern("[]="); - i_aref = rb_intern("[]"); - i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); i_uminus = rb_intern("-@"); diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h index 3abbdb0209..611b41b066 100644 --- a/ext/json/simd/simd.h +++ b/ext/json/simd/simd.h @@ -1,10 +1,14 @@ +#include "../json.h" + typedef enum { SIMD_NONE, SIMD_NEON, SIMD_SSE2 } SIMD_Implementation; -#ifdef JSON_ENABLE_SIMD +#ifndef __has_builtin // Optional of course. + #define __has_builtin(x) 0 // Compatibility with non-clang compilers. +#endif #ifdef __clang__ # if __has_builtin(__builtin_ctzll) @@ -20,6 +24,8 @@ typedef enum { static inline uint32_t trailing_zeros64(uint64_t input) { + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + #if HAVE_BUILTIN_CTZLL return __builtin_ctzll(input); #else @@ -35,6 +41,8 @@ static inline uint32_t trailing_zeros64(uint64_t input) static inline int trailing_zeros(int input) { + JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior + #if HAVE_BUILTIN_CTZLL return __builtin_ctz(input); #else @@ -48,14 +56,36 @@ static inline int trailing_zeros(int input) #endif } -#if (defined(__GNUC__ ) || defined(__clang__)) -#define FORCE_INLINE __attribute__((always_inline)) -#else -#define FORCE_INLINE -#endif +#ifdef JSON_ENABLE_SIMD +#define SIMD_MINIMUM_THRESHOLD 4 -#define SIMD_MINIMUM_THRESHOLD 6 +ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len) +{ + RBIMPL_ASSERT_OR_ASSUME(len < 16); + RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4 +#if defined(__has_builtin) && __has_builtin(__builtin_memcpy) + // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes. + // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy + // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct + // position in both copies. + + // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the + // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)), + // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional + // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch + // plus two loads and stores generated when using __builtin_memcpy. + if (len >= 8) { + __builtin_memcpy(dst, src, 8); + __builtin_memcpy(dst + len - 8, src + len - 8, 8); + } else { + __builtin_memcpy(dst, src, 4); + __builtin_memcpy(dst + len - 4, src + len - 4, 4); + } +#else + MEMCPY(dst, src, char, len); +#endif +} #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) #include <arm_neon.h> @@ -70,14 +100,14 @@ static inline SIMD_Implementation find_simd_implementation(void) #define HAVE_SIMD_NEON 1 // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon -static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches) +ALWAYS_INLINE(static) uint64_t neon_match_mask(uint8x16_t matches) { const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); return mask & 0x8888888888888888ull; } -static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr) +ALWAYS_INLINE(static) uint64_t compute_chunk_mask_neon(const char *ptr) { uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); @@ -90,7 +120,7 @@ static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr) return neon_match_mask(needs_escape); } -static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask) +ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask) { while (*ptr + sizeof(uint8x16_t) <= end) { uint64_t chunk_mask = compute_chunk_mask_neon(*ptr); @@ -103,16 +133,6 @@ static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const cha return 0; } -static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table) -{ - uint8x16x4_t tab; - tab.val[0] = vld1q_u8(table); - tab.val[1] = vld1q_u8(table+16); - tab.val[2] = vld1q_u8(table+32); - tab.val[3] = vld1q_u8(table+48); - return tab; -} - #endif /* ARM Neon Support.*/ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) @@ -137,7 +157,7 @@ static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table) #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) -static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr) +ALWAYS_INLINE(static) TARGET_SSE2 int compute_chunk_mask_sse2(const char *ptr) { __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 @@ -148,7 +168,7 @@ static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *p return _mm_movemask_epi8(needs_escape); } -static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask) +ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, const char *end, int *mask) { while (*ptr + sizeof(__m128i) <= end) { int chunk_mask = compute_chunk_mask_sse2(*ptr); diff --git a/ext/json/vendor/fpconv.c b/ext/json/vendor/fpconv.c index 75efd46f11..6c9bc2c103 100644 --- a/ext/json/vendor/fpconv.c +++ b/ext/json/vendor/fpconv.c @@ -29,6 +29,10 @@ #include <string.h> #include <stdint.h> +#if JSON_DEBUG +#include <assert.h> +#endif + #define npowers 87 #define steppowers 8 #define firstpower -348 /* 10 ^ -348 */ @@ -320,15 +324,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg) { int exp = absv(K + ndigits - 1); - int max_trailing_zeros = 7; - - if(neg) { - max_trailing_zeros -= 1; - } - - /* write plain integer */ - if(K >= 0 && (exp < (ndigits + max_trailing_zeros))) { - + if(K >= 0 && exp < 15) { memcpy(dest, digits, ndigits); memset(dest + ndigits, '0', K); @@ -432,10 +428,12 @@ static int filter_special(double fp, char* dest) * * Input: * fp -> the double to convert, dest -> destination buffer. - * The generated string will never be longer than 28 characters. - * Make sure to pass a pointer to at least 28 bytes of memory. + * The generated string will never be longer than 32 characters. + * Make sure to pass a pointer to at least 32 bytes of memory. * The emitted string will not be null terminated. * + * + * * Output: * The number of written characters. * @@ -451,7 +449,7 @@ static int filter_special(double fp, char* dest) * } * */ -static int fpconv_dtoa(double d, char dest[28]) +static int fpconv_dtoa(double d, char dest[32]) { char digits[18]; @@ -474,6 +472,9 @@ static int fpconv_dtoa(double d, char dest[28]) int ndigits = grisu2(d, digits, &K); str_len += emit_digits(digits, ndigits, dest + str_len, K, neg); +#if JSON_DEBUG + assert(str_len <= 32); +#endif return str_len; } diff --git a/ext/json/vendor/ryu.h b/ext/json/vendor/ryu.h new file mode 100644 index 0000000000..f06ec814b4 --- /dev/null +++ b/ext/json/vendor/ryu.h @@ -0,0 +1,819 @@ +// Copyright 2018 Ulf Adams +// +// The contents of this file may be used under the terms of the Apache License, +// Version 2.0. +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. +// +// --- +// +// Apache License +// Version 2.0, January 2004 +// http://www.apache.org/licenses/ +// +// TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +// +// 1. Definitions. +// +// "License" shall mean the terms and conditions for use, reproduction, +// and distribution as defined by Sections 1 through 9 of this document. +// +// "Licensor" shall mean the copyright owner or entity authorized by +// the copyright owner that is granting the License. +// +// "Legal Entity" shall mean the union of the acting entity and all +// other entities that control, are controlled by, or are under common +// control with that entity. For the purposes of this definition, +// "control" means (i) the power, direct or indirect, to cause the +// direction or management of such entity, whether by contract or +// otherwise, or (ii) ownership of fifty percent (50%) or more of the +// outstanding shares, or (iii) beneficial ownership of such entity. +// +// "You" (or "Your") shall mean an individual or Legal Entity +// exercising permissions granted by this License. +// +// "Source" form shall mean the preferred form for making modifications, +// including but not limited to software source code, documentation +// source, and configuration files. +// +// "Object" form shall mean any form resulting from mechanical +// transformation or translation of a Source form, including but +// not limited to compiled object code, generated documentation, +// and conversions to other media types. +// +// "Work" shall mean the work of authorship, whether in Source or +// Object form, made available under the License, as indicated by a +// copyright notice that is included in or attached to the work +// (an example is provided in the Appendix below). +// +// "Derivative Works" shall mean any work, whether in Source or Object +// form, that is based on (or derived from) the Work and for which the +// editorial revisions, annotations, elaborations, or other modifications +// represent, as a whole, an original work of authorship. For the purposes +// of this License, Derivative Works shall not include works that remain +// separable from, or merely link (or bind by name) to the interfaces of, +// the Work and Derivative Works thereof. +// +// "Contribution" shall mean any work of authorship, including +// the original version of the Work and any modifications or additions +// to that Work or Derivative Works thereof, that is intentionally +// submitted to Licensor for inclusion in the Work by the copyright owner +// or by an individual or Legal Entity authorized to submit on behalf of +// the copyright owner. For the purposes of this definition, "submitted" +// means any form of electronic, verbal, or written communication sent +// to the Licensor or its representatives, including but not limited to +// communication on electronic mailing lists, source code control systems, +// and issue tracking systems that are managed by, or on behalf of, the +// Licensor for the purpose of discussing and improving the Work, but +// excluding communication that is conspicuously marked or otherwise +// designated in writing by the copyright owner as "Not a Contribution." +// +// "Contributor" shall mean Licensor and any individual or Legal Entity +// on behalf of whom a Contribution has been received by Licensor and +// subsequently incorporated within the Work. +// +// 2. Grant of Copyright License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// copyright license to reproduce, prepare Derivative Works of, +// publicly display, publicly perform, sublicense, and distribute the +// Work and such Derivative Works in Source or Object form. +// +// 3. Grant of Patent License. Subject to the terms and conditions of +// this License, each Contributor hereby grants to You a perpetual, +// worldwide, non-exclusive, no-charge, royalty-free, irrevocable +// (except as stated in this section) patent license to make, have made, +// use, offer to sell, sell, import, and otherwise transfer the Work, +// where such license applies only to those patent claims licensable +// by such Contributor that are necessarily infringed by their +// Contribution(s) alone or by combination of their Contribution(s) +// with the Work to which such Contribution(s) was submitted. If You +// institute patent litigation against any entity (including a +// cross-claim or counterclaim in a lawsuit) alleging that the Work +// or a Contribution incorporated within the Work constitutes direct +// or contributory patent infringement, then any patent licenses +// granted to You under this License for that Work shall terminate +// as of the date such litigation is filed. +// +// 4. Redistribution. You may reproduce and distribute copies of the +// Work or Derivative Works thereof in any medium, with or without +// modifications, and in Source or Object form, provided that You +// meet the following conditions: +// +// (a) You must give any other recipients of the Work or +// Derivative Works a copy of this License; and +// +// (b) You must cause any modified files to carry prominent notices +// stating that You changed the files; and +// +// (c) You must retain, in the Source form of any Derivative Works +// that You distribute, all copyright, patent, trademark, and +// attribution notices from the Source form of the Work, +// excluding those notices that do not pertain to any part of +// the Derivative Works; and +// +// (d) If the Work includes a "NOTICE" text file as part of its +// distribution, then any Derivative Works that You distribute must +// include a readable copy of the attribution notices contained +// within such NOTICE file, excluding those notices that do not +// pertain to any part of the Derivative Works, in at least one +// of the following places: within a NOTICE text file distributed +// as part of the Derivative Works; within the Source form or +// documentation, if provided along with the Derivative Works; or, +// within a display generated by the Derivative Works, if and +// wherever such third-party notices normally appear. The contents +// of the NOTICE file are for informational purposes only and +// do not modify the License. You may add Your own attribution +// notices within Derivative Works that You distribute, alongside +// or as an addendum to the NOTICE text from the Work, provided +// that such additional attribution notices cannot be construed +// as modifying the License. +// +// You may add Your own copyright statement to Your modifications and +// may provide additional or different license terms and conditions +// for use, reproduction, or distribution of Your modifications, or +// for any such Derivative Works as a whole, provided Your use, +// reproduction, and distribution of the Work otherwise complies with +// the conditions stated in this License. +// +// 5. Submission of Contributions. Unless You explicitly state otherwise, +// any Contribution intentionally submitted for inclusion in the Work +// by You to the Licensor shall be under the terms and conditions of +// this License, without any additional terms or conditions. +// Notwithstanding the above, nothing herein shall supersede or modify +// the terms of any separate license agreement you may have executed +// with Licensor regarding such Contributions. +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor, +// except as required for reasonable and customary use in describing the +// origin of the Work and reproducing the content of the NOTICE file. +// +// 7. Disclaimer of Warranty. Unless required by applicable law or +// agreed to in writing, Licensor provides the Work (and each +// Contributor provides its Contributions) on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied, including, without limitation, any warranties or conditions +// of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +// PARTICULAR PURPOSE. You are solely responsible for determining the +// appropriateness of using or redistributing the Work and assume any +// risks associated with Your exercise of permissions under this License. +// +// 8. Limitation of Liability. In no event and under no legal theory, +// whether in tort (including negligence), contract, or otherwise, +// unless required by applicable law (such as deliberate and grossly +// negligent acts) or agreed to in writing, shall any Contributor be +// liable to You for damages, including any direct, indirect, special, +// incidental, or consequential damages of any character arising as a +// result of this License or out of the use or inability to use the +// Work (including but not limited to damages for loss of goodwill, +// work stoppage, computer failure or malfunction, or any and all +// other commercial damages or losses), even if such Contributor +// has been advised of the possibility of such damages. +// +// 9. Accepting Warranty or Additional Liability. While redistributing +// the Work or Derivative Works thereof, You may choose to offer, +// and charge a fee for, acceptance of support, warranty, indemnity, +// or other liability obligations and/or rights consistent with this +// License. However, in accepting such obligations, You may act only +// on Your own behalf and on Your sole responsibility, not on behalf +// of any other Contributor, and only if You agree to indemnify, +// defend, and hold each Contributor harmless for any liability +// incurred by, or claims asserted against, such Contributor by reason +// of your accepting any such warranty or additional liability. +// +// END OF TERMS AND CONDITIONS +// +// APPENDIX: How to apply the Apache License to your work. +// +// To apply the Apache License to your work, attach the following +// boilerplate notice, with the fields enclosed by brackets "[]" +// replaced with your own identifying information. (Don't include +// the brackets!) The text should be enclosed in the appropriate +// comment syntax for the file format. We also recommend that a +// file or class name and description of purpose be included on the +// same "printed page" as the copyright notice for easier +// identification within third-party archives. +// +// Copyright [yyyy] [name of copyright owner] +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// --- +// +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// --- +// Minimal Ryu implementation adapted for Ruby JSON gem by Josef Šimánek +// Optimized for pre-extracted mantissa/exponent from JSON parsing +// This is a stripped-down version containing only what's needed for +// converting decimal mantissa+exponent to IEEE 754 double precision. + +#ifndef RYU_H +#define RYU_H + +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +// Detect __builtin_clzll availability (for floor_log2) +// Note: MSVC doesn't have __builtin_clzll, so we provide a fallback +#ifdef __clang__ + #if __has_builtin(__builtin_clzll) + #define RYU_HAVE_BUILTIN_CLZLL 1 + #else + #define RYU_HAVE_BUILTIN_CLZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define RYU_HAVE_BUILTIN_CLZLL 1 +#else + #define RYU_HAVE_BUILTIN_CLZLL 0 +#endif + +// Count leading zeros (for floor_log2) +static inline uint32_t ryu_leading_zeros64(uint64_t input) +{ +#if RYU_HAVE_BUILTIN_CLZLL + return __builtin_clzll(input); +#else + // Fallback: binary search for the highest set bit + // This works on MSVC and other compilers without __builtin_clzll + if (input == 0) return 64; + uint32_t n = 0; + if (input <= 0x00000000FFFFFFFFULL) { n += 32; input <<= 32; } + if (input <= 0x0000FFFFFFFFFFFFULL) { n += 16; input <<= 16; } + if (input <= 0x00FFFFFFFFFFFFFFULL) { n += 8; input <<= 8; } + if (input <= 0x0FFFFFFFFFFFFFFFULL) { n += 4; input <<= 4; } + if (input <= 0x3FFFFFFFFFFFFFFFULL) { n += 2; input <<= 2; } + if (input <= 0x7FFFFFFFFFFFFFFFULL) { n += 1; } + return n; +#endif +} + +// These tables are generated by PrintDoubleLookupTable. +#define DOUBLE_POW5_INV_BITCOUNT 125 +#define DOUBLE_POW5_BITCOUNT 125 + +#define DOUBLE_POW5_INV_TABLE_SIZE 342 +#define DOUBLE_POW5_TABLE_SIZE 326 + +static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = { + { 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u }, + { 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u }, + { 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u }, + { 13026647942995916322u, 1208925819614629174u }, { 9774590264567735146u, 1934281311383406679u }, + { 11509021026396098440u, 1547425049106725343u }, { 16585914450600699399u, 1237940039285380274u }, + { 15469416676735388068u, 1980704062856608439u }, { 16064882156130220778u, 1584563250285286751u }, + { 9162556910162266299u, 1267650600228229401u }, { 7281393426775805432u, 2028240960365167042u }, + { 16893161185646375315u, 1622592768292133633u }, { 2446482504291369283u, 1298074214633706907u }, + { 7603720821608101175u, 2076918743413931051u }, { 2393627842544570617u, 1661534994731144841u }, + { 16672297533003297786u, 1329227995784915872u }, { 11918280793837635165u, 2126764793255865396u }, + { 5845275820328197809u, 1701411834604692317u }, { 15744267100488289217u, 1361129467683753853u }, + { 3054734472329800808u, 2177807148294006166u }, { 17201182836831481939u, 1742245718635204932u }, + { 6382248639981364905u, 1393796574908163946u }, { 2832900194486363201u, 2230074519853062314u }, + { 5955668970331000884u, 1784059615882449851u }, { 1075186361522890384u, 1427247692705959881u }, + { 12788344622662355584u, 2283596308329535809u }, { 13920024512871794791u, 1826877046663628647u }, + { 3757321980813615186u, 1461501637330902918u }, { 10384555214134712795u, 1169201309864722334u }, + { 5547241898389809503u, 1870722095783555735u }, { 4437793518711847602u, 1496577676626844588u }, + { 10928932444453298728u, 1197262141301475670u }, { 17486291911125277965u, 1915619426082361072u }, + { 6610335899416401726u, 1532495540865888858u }, { 12666966349016942027u, 1225996432692711086u }, + { 12888448528943286597u, 1961594292308337738u }, { 17689456452638449924u, 1569275433846670190u }, + { 14151565162110759939u, 1255420347077336152u }, { 7885109000409574610u, 2008672555323737844u }, + { 9997436015069570011u, 1606938044258990275u }, { 7997948812055656009u, 1285550435407192220u }, + { 12796718099289049614u, 2056880696651507552u }, { 2858676849947419045u, 1645504557321206042u }, + { 13354987924183666206u, 1316403645856964833u }, { 17678631863951955605u, 2106245833371143733u }, + { 3074859046935833515u, 1684996666696914987u }, { 13527933681774397782u, 1347997333357531989u }, + { 10576647446613305481u, 2156795733372051183u }, { 15840015586774465031u, 1725436586697640946u }, + { 8982663654677661702u, 1380349269358112757u }, { 18061610662226169046u, 2208558830972980411u }, + { 10759939715039024913u, 1766847064778384329u }, { 12297300586773130254u, 1413477651822707463u }, + { 15986332124095098083u, 2261564242916331941u }, { 9099716884534168143u, 1809251394333065553u }, + { 14658471137111155161u, 1447401115466452442u }, { 4348079280205103483u, 1157920892373161954u }, + { 14335624477811986218u, 1852673427797059126u }, { 7779150767507678651u, 1482138742237647301u }, + { 2533971799264232598u, 1185710993790117841u }, { 15122401323048503126u, 1897137590064188545u }, + { 12097921058438802501u, 1517710072051350836u }, { 5988988032009131678u, 1214168057641080669u }, + { 16961078480698431330u, 1942668892225729070u }, { 13568862784558745064u, 1554135113780583256u }, + { 7165741412905085728u, 1243308091024466605u }, { 11465186260648137165u, 1989292945639146568u }, + { 16550846638002330379u, 1591434356511317254u }, { 16930026125143774626u, 1273147485209053803u }, + { 4951948911778577463u, 2037035976334486086u }, { 272210314680951647u, 1629628781067588869u }, + { 3907117066486671641u, 1303703024854071095u }, { 6251387306378674625u, 2085924839766513752u }, + { 16069156289328670670u, 1668739871813211001u }, { 9165976216721026213u, 1334991897450568801u }, + { 7286864317269821294u, 2135987035920910082u }, { 16897537898041588005u, 1708789628736728065u }, + { 13518030318433270404u, 1367031702989382452u }, { 6871453250525591353u, 2187250724783011924u }, + { 9186511415162383406u, 1749800579826409539u }, { 11038557946871817048u, 1399840463861127631u }, + { 10282995085511086630u, 2239744742177804210u }, { 8226396068408869304u, 1791795793742243368u }, + { 13959814484210916090u, 1433436634993794694u }, { 11267656730511734774u, 2293498615990071511u }, + { 5324776569667477496u, 1834798892792057209u }, { 7949170070475892320u, 1467839114233645767u }, + { 17427382500606444826u, 1174271291386916613u }, { 5747719112518849781u, 1878834066219066582u }, + { 15666221734240810795u, 1503067252975253265u }, { 12532977387392648636u, 1202453802380202612u }, + { 5295368560860596524u, 1923926083808324180u }, { 4236294848688477220u, 1539140867046659344u }, + { 7078384693692692099u, 1231312693637327475u }, { 11325415509908307358u, 1970100309819723960u }, + { 9060332407926645887u, 1576080247855779168u }, { 14626963555825137356u, 1260864198284623334u }, + { 12335095245094488799u, 2017382717255397335u }, { 9868076196075591040u, 1613906173804317868u }, + { 15273158586344293478u, 1291124939043454294u }, { 13369007293925138595u, 2065799902469526871u }, + { 7005857020398200553u, 1652639921975621497u }, { 16672732060544291412u, 1322111937580497197u }, + { 11918976037903224966u, 2115379100128795516u }, { 5845832015580669650u, 1692303280103036413u }, + { 12055363241948356366u, 1353842624082429130u }, { 841837113407818570u, 2166148198531886609u }, + { 4362818505468165179u, 1732918558825509287u }, { 14558301248600263113u, 1386334847060407429u }, + { 12225235553534690011u, 2218135755296651887u }, { 2401490813343931363u, 1774508604237321510u }, + { 1921192650675145090u, 1419606883389857208u }, { 17831303500047873437u, 2271371013423771532u }, + { 6886345170554478103u, 1817096810739017226u }, { 1819727321701672159u, 1453677448591213781u }, + { 16213177116328979020u, 1162941958872971024u }, { 14873036941900635463u, 1860707134196753639u }, + { 15587778368262418694u, 1488565707357402911u }, { 8780873879868024632u, 1190852565885922329u }, + { 2981351763563108441u, 1905364105417475727u }, { 13453127855076217722u, 1524291284333980581u }, + { 7073153469319063855u, 1219433027467184465u }, { 11317045550910502167u, 1951092843947495144u }, + { 12742985255470312057u, 1560874275157996115u }, { 10194388204376249646u, 1248699420126396892u }, + { 1553625868034358140u, 1997919072202235028u }, { 8621598323911307159u, 1598335257761788022u }, + { 17965325103354776697u, 1278668206209430417u }, { 13987124906400001422u, 2045869129935088668u }, + { 121653480894270168u, 1636695303948070935u }, { 97322784715416134u, 1309356243158456748u }, + { 14913111714512307107u, 2094969989053530796u }, { 8241140556867935363u, 1675975991242824637u }, + { 17660958889720079260u, 1340780792994259709u }, { 17189487779326395846u, 2145249268790815535u }, + { 13751590223461116677u, 1716199415032652428u }, { 18379969808252713988u, 1372959532026121942u }, + { 14650556434236701088u, 2196735251241795108u }, { 652398703163629901u, 1757388200993436087u }, + { 11589965406756634890u, 1405910560794748869u }, { 7475898206584884855u, 2249456897271598191u }, + { 2291369750525997561u, 1799565517817278553u }, { 9211793429904618695u, 1439652414253822842u }, + { 18428218302589300235u, 2303443862806116547u }, { 7363877012587619542u, 1842755090244893238u }, + { 13269799239553916280u, 1474204072195914590u }, { 10615839391643133024u, 1179363257756731672u }, + { 2227947767661371545u, 1886981212410770676u }, { 16539753473096738529u, 1509584969928616540u }, + { 13231802778477390823u, 1207667975942893232u }, { 6413489186596184024u, 1932268761508629172u }, + { 16198837793502678189u, 1545815009206903337u }, { 5580372605318321905u, 1236652007365522670u }, + { 8928596168509315048u, 1978643211784836272u }, { 18210923379033183008u, 1582914569427869017u }, + { 7190041073742725760u, 1266331655542295214u }, { 436019273762630246u, 2026130648867672343u }, + { 7727513048493924843u, 1620904519094137874u }, { 9871359253537050198u, 1296723615275310299u }, + { 4726128361433549347u, 2074757784440496479u }, { 7470251503888749801u, 1659806227552397183u }, + { 13354898832594820487u, 1327844982041917746u }, { 13989140502667892133u, 2124551971267068394u }, + { 14880661216876224029u, 1699641577013654715u }, { 11904528973500979224u, 1359713261610923772u }, + { 4289851098633925465u, 2175541218577478036u }, { 18189276137874781665u, 1740432974861982428u }, + { 3483374466074094362u, 1392346379889585943u }, { 1884050330976640656u, 2227754207823337509u }, + { 5196589079523222848u, 1782203366258670007u }, { 15225317707844309248u, 1425762693006936005u }, + { 5913764258841343181u, 2281220308811097609u }, { 8420360221814984868u, 1824976247048878087u }, + { 17804334621677718864u, 1459980997639102469u }, { 17932816512084085415u, 1167984798111281975u }, + { 10245762345624985047u, 1868775676978051161u }, { 4507261061758077715u, 1495020541582440929u }, + { 7295157664148372495u, 1196016433265952743u }, { 7982903447895485668u, 1913626293225524389u }, + { 10075671573058298858u, 1530901034580419511u }, { 4371188443704728763u, 1224720827664335609u }, + { 14372599139411386667u, 1959553324262936974u }, { 15187428126271019657u, 1567642659410349579u }, + { 15839291315758726049u, 1254114127528279663u }, { 3206773216762499739u, 2006582604045247462u }, + { 13633465017635730761u, 1605266083236197969u }, { 14596120828850494932u, 1284212866588958375u }, + { 4907049252451240275u, 2054740586542333401u }, { 236290587219081897u, 1643792469233866721u }, + { 14946427728742906810u, 1315033975387093376u }, { 16535586736504830250u, 2104054360619349402u }, + { 5849771759720043554u, 1683243488495479522u }, { 15747863852001765813u, 1346594790796383617u }, + { 10439186904235184007u, 2154551665274213788u }, { 15730047152871967852u, 1723641332219371030u }, + { 12584037722297574282u, 1378913065775496824u }, { 9066413911450387881u, 2206260905240794919u }, + { 10942479943902220628u, 1765008724192635935u }, { 8753983955121776503u, 1412006979354108748u }, + { 10317025513452932081u, 2259211166966573997u }, { 874922781278525018u, 1807368933573259198u }, + { 8078635854506640661u, 1445895146858607358u }, { 13841606313089133175u, 1156716117486885886u }, + { 14767872471458792434u, 1850745787979017418u }, { 746251532941302978u, 1480596630383213935u }, + { 597001226353042382u, 1184477304306571148u }, { 15712597221132509104u, 1895163686890513836u }, + { 8880728962164096960u, 1516130949512411069u }, { 10793931984473187891u, 1212904759609928855u }, + { 17270291175157100626u, 1940647615375886168u }, { 2748186495899949531u, 1552518092300708935u }, + { 2198549196719959625u, 1242014473840567148u }, { 18275073973719576693u, 1987223158144907436u }, + { 10930710364233751031u, 1589778526515925949u }, { 12433917106128911148u, 1271822821212740759u }, + { 8826220925580526867u, 2034916513940385215u }, { 7060976740464421494u, 1627933211152308172u }, + { 16716827836597268165u, 1302346568921846537u }, { 11989529279587987770u, 2083754510274954460u }, + { 9591623423670390216u, 1667003608219963568u }, { 15051996368420132820u, 1333602886575970854u }, + { 13015147745246481542u, 2133764618521553367u }, { 3033420566713364587u, 1707011694817242694u }, + { 6116085268112601993u, 1365609355853794155u }, { 9785736428980163188u, 2184974969366070648u }, + { 15207286772667951197u, 1747979975492856518u }, { 1097782973908629988u, 1398383980394285215u }, + { 1756452758253807981u, 2237414368630856344u }, { 5094511021344956708u, 1789931494904685075u }, + { 4075608817075965366u, 1431945195923748060u }, { 6520974107321544586u, 2291112313477996896u }, + { 1527430471115325346u, 1832889850782397517u }, { 12289990821117991246u, 1466311880625918013u }, + { 17210690286378213644u, 1173049504500734410u }, { 9090360384495590213u, 1876879207201175057u }, + { 18340334751822203140u, 1501503365760940045u }, { 14672267801457762512u, 1201202692608752036u }, + { 16096930852848599373u, 1921924308174003258u }, { 1809498238053148529u, 1537539446539202607u }, + { 12515645034668249793u, 1230031557231362085u }, { 1578287981759648052u, 1968050491570179337u }, + { 12330676829633449412u, 1574440393256143469u }, { 13553890278448669853u, 1259552314604914775u }, + { 3239480371808320148u, 2015283703367863641u }, { 17348979556414297411u, 1612226962694290912u }, + { 6500486015647617283u, 1289781570155432730u }, { 10400777625036187652u, 2063650512248692368u }, + { 15699319729512770768u, 1650920409798953894u }, { 16248804598352126938u, 1320736327839163115u }, + { 7551343283653851484u, 2113178124542660985u }, { 6041074626923081187u, 1690542499634128788u }, + { 12211557331022285596u, 1352433999707303030u }, { 1091747655926105338u, 2163894399531684849u }, + { 4562746939482794594u, 1731115519625347879u }, { 7339546366328145998u, 1384892415700278303u }, + { 8053925371383123274u, 2215827865120445285u }, { 6443140297106498619u, 1772662292096356228u }, + { 12533209867169019542u, 1418129833677084982u }, { 5295740528502789974u, 2269007733883335972u }, + { 15304638867027962949u, 1815206187106668777u }, { 4865013464138549713u, 1452164949685335022u }, + { 14960057215536570740u, 1161731959748268017u }, { 9178696285890871890u, 1858771135597228828u }, + { 14721654658196518159u, 1487016908477783062u }, { 4398626097073393881u, 1189613526782226450u }, + { 7037801755317430209u, 1903381642851562320u }, { 5630241404253944167u, 1522705314281249856u }, + { 814844308661245011u, 1218164251424999885u }, { 1303750893857992017u, 1949062802279999816u }, + { 15800395974054034906u, 1559250241823999852u }, { 5261619149759407279u, 1247400193459199882u }, + { 12107939454356961969u, 1995840309534719811u }, { 5997002748743659252u, 1596672247627775849u }, + { 8486951013736837725u, 1277337798102220679u }, { 2511075177753209390u, 2043740476963553087u }, + { 13076906586428298482u, 1634992381570842469u }, { 14150874083884549109u, 1307993905256673975u }, + { 4194654460505726958u, 2092790248410678361u }, { 18113118827372222859u, 1674232198728542688u }, + { 3422448617672047318u, 1339385758982834151u }, { 16543964232501006678u, 2143017214372534641u }, + { 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u }, + { 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u }, + { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u }, + { 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u }, + { 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u }, + { 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u }, + { 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u }, + { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u }, + { 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u }, + { 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u }, + { 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u }, + { 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u }, + { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u }, + { 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u }, + { 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u }, + { 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u }, + { 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u }, + { 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u }, + { 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u }, + { 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u }, + { 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u }, + { 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u }, + { 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u }, + { 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u }, + { 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u }, + { 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u }, + { 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u }, + { 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u }, + { 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u } +}; + +static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = { + { 0u, 1152921504606846976u }, { 0u, 1441151880758558720u }, + { 0u, 1801439850948198400u }, { 0u, 2251799813685248000u }, + { 0u, 1407374883553280000u }, { 0u, 1759218604441600000u }, + { 0u, 2199023255552000000u }, { 0u, 1374389534720000000u }, + { 0u, 1717986918400000000u }, { 0u, 2147483648000000000u }, + { 0u, 1342177280000000000u }, { 0u, 1677721600000000000u }, + { 0u, 2097152000000000000u }, { 0u, 1310720000000000000u }, + { 0u, 1638400000000000000u }, { 0u, 2048000000000000000u }, + { 0u, 1280000000000000000u }, { 0u, 1600000000000000000u }, + { 0u, 2000000000000000000u }, { 0u, 1250000000000000000u }, + { 0u, 1562500000000000000u }, { 0u, 1953125000000000000u }, + { 0u, 1220703125000000000u }, { 0u, 1525878906250000000u }, + { 0u, 1907348632812500000u }, { 0u, 1192092895507812500u }, + { 0u, 1490116119384765625u }, { 4611686018427387904u, 1862645149230957031u }, + { 9799832789158199296u, 1164153218269348144u }, { 12249790986447749120u, 1455191522836685180u }, + { 15312238733059686400u, 1818989403545856475u }, { 14528612397897220096u, 2273736754432320594u }, + { 13692068767113150464u, 1421085471520200371u }, { 12503399940464050176u, 1776356839400250464u }, + { 15629249925580062720u, 2220446049250313080u }, { 9768281203487539200u, 1387778780781445675u }, + { 7598665485932036096u, 1734723475976807094u }, { 274959820560269312u, 2168404344971008868u }, + { 9395221924704944128u, 1355252715606880542u }, { 2520655369026404352u, 1694065894508600678u }, + { 12374191248137781248u, 2117582368135750847u }, { 14651398557727195136u, 1323488980084844279u }, + { 13702562178731606016u, 1654361225106055349u }, { 3293144668132343808u, 2067951531382569187u }, + { 18199116482078572544u, 1292469707114105741u }, { 8913837547316051968u, 1615587133892632177u }, + { 15753982952572452864u, 2019483917365790221u }, { 12152082354571476992u, 1262177448353618888u }, + { 15190102943214346240u, 1577721810442023610u }, { 9764256642163156992u, 1972152263052529513u }, + { 17631875447420442880u, 1232595164407830945u }, { 8204786253993389888u, 1540743955509788682u }, + { 1032610780636961552u, 1925929944387235853u }, { 2951224747111794922u, 1203706215242022408u }, + { 3689030933889743652u, 1504632769052528010u }, { 13834660704216955373u, 1880790961315660012u }, + { 17870034976990372916u, 1175494350822287507u }, { 17725857702810578241u, 1469367938527859384u }, + { 3710578054803671186u, 1836709923159824231u }, { 26536550077201078u, 2295887403949780289u }, + { 11545800389866720434u, 1434929627468612680u }, { 14432250487333400542u, 1793662034335765850u }, + { 8816941072311974870u, 2242077542919707313u }, { 17039803216263454053u, 1401298464324817070u }, + { 12076381983474541759u, 1751623080406021338u }, { 5872105442488401391u, 2189528850507526673u }, + { 15199280947623720629u, 1368455531567204170u }, { 9775729147674874978u, 1710569414459005213u }, + { 16831347453020981627u, 2138211768073756516u }, { 1296220121283337709u, 1336382355046097823u }, + { 15455333206886335848u, 1670477943807622278u }, { 10095794471753144002u, 2088097429759527848u }, + { 6309871544845715001u, 1305060893599704905u }, { 12499025449484531656u, 1631326116999631131u }, + { 11012095793428276666u, 2039157646249538914u }, { 11494245889320060820u, 1274473528905961821u }, + { 532749306367912313u, 1593091911132452277u }, { 5277622651387278295u, 1991364888915565346u }, + { 7910200175544436838u, 1244603055572228341u }, { 14499436237857933952u, 1555753819465285426u }, + { 8900923260467641632u, 1944692274331606783u }, { 12480606065433357876u, 1215432671457254239u }, + { 10989071563364309441u, 1519290839321567799u }, { 9124653435777998898u, 1899113549151959749u }, + { 8008751406574943263u, 1186945968219974843u }, { 5399253239791291175u, 1483682460274968554u }, + { 15972438586593889776u, 1854603075343710692u }, { 759402079766405302u, 1159126922089819183u }, + { 14784310654990170340u, 1448908652612273978u }, { 9257016281882937117u, 1811135815765342473u }, + { 16182956370781059300u, 2263919769706678091u }, { 7808504722524468110u, 1414949856066673807u }, + { 5148944884728197234u, 1768687320083342259u }, { 1824495087482858639u, 2210859150104177824u }, + { 1140309429676786649u, 1381786968815111140u }, { 1425386787095983311u, 1727233711018888925u }, + { 6393419502297367043u, 2159042138773611156u }, { 13219259225790630210u, 1349401336733506972u }, + { 16524074032238287762u, 1686751670916883715u }, { 16043406521870471799u, 2108439588646104644u }, + { 803757039314269066u, 1317774742903815403u }, { 14839754354425000045u, 1647218428629769253u }, + { 4714634887749086344u, 2059023035787211567u }, { 9864175832484260821u, 1286889397367007229u }, + { 16941905809032713930u, 1608611746708759036u }, { 2730638187581340797u, 2010764683385948796u }, + { 10930020904093113806u, 1256727927116217997u }, { 18274212148543780162u, 1570909908895272496u }, + { 4396021111970173586u, 1963637386119090621u }, { 5053356204195052443u, 1227273366324431638u }, + { 15540067292098591362u, 1534091707905539547u }, { 14813398096695851299u, 1917614634881924434u }, + { 13870059828862294966u, 1198509146801202771u }, { 12725888767650480803u, 1498136433501503464u }, + { 15907360959563101004u, 1872670541876879330u }, { 14553786618154326031u, 1170419088673049581u }, + { 4357175217410743827u, 1463023860841311977u }, { 10058155040190817688u, 1828779826051639971u }, + { 7961007781811134206u, 2285974782564549964u }, { 14199001900486734687u, 1428734239102843727u }, + { 13137066357181030455u, 1785917798878554659u }, { 11809646928048900164u, 2232397248598193324u }, + { 16604401366885338411u, 1395248280373870827u }, { 16143815690179285109u, 1744060350467338534u }, + { 10956397575869330579u, 2180075438084173168u }, { 6847748484918331612u, 1362547148802608230u }, + { 17783057643002690323u, 1703183936003260287u }, { 17617136035325974999u, 2128979920004075359u }, + { 17928239049719816230u, 1330612450002547099u }, { 17798612793722382384u, 1663265562503183874u }, + { 13024893955298202172u, 2079081953128979843u }, { 5834715712847682405u, 1299426220705612402u }, + { 16516766677914378815u, 1624282775882015502u }, { 11422586310538197711u, 2030353469852519378u }, + { 11750802462513761473u, 1268970918657824611u }, { 10076817059714813937u, 1586213648322280764u }, + { 12596021324643517422u, 1982767060402850955u }, { 5566670318688504437u, 1239229412751781847u }, + { 2346651879933242642u, 1549036765939727309u }, { 7545000868343941206u, 1936295957424659136u }, + { 4715625542714963254u, 1210184973390411960u }, { 5894531928393704067u, 1512731216738014950u }, + { 16591536947346905892u, 1890914020922518687u }, { 17287239619732898039u, 1181821263076574179u }, + { 16997363506238734644u, 1477276578845717724u }, { 2799960309088866689u, 1846595723557147156u }, + { 10973347230035317489u, 1154122327223216972u }, { 13716684037544146861u, 1442652909029021215u }, + { 12534169028502795672u, 1803316136286276519u }, { 11056025267201106687u, 2254145170357845649u }, + { 18439230838069161439u, 1408840731473653530u }, { 13825666510731675991u, 1761050914342066913u }, + { 3447025083132431277u, 2201313642927583642u }, { 6766076695385157452u, 1375821026829739776u }, + { 8457595869231446815u, 1719776283537174720u }, { 10571994836539308519u, 2149720354421468400u }, + { 6607496772837067824u, 1343575221513417750u }, { 17482743002901110588u, 1679469026891772187u }, + { 17241742735199000331u, 2099336283614715234u }, { 15387775227926763111u, 1312085177259197021u }, + { 5399660979626290177u, 1640106471573996277u }, { 11361262242960250625u, 2050133089467495346u }, + { 11712474920277544544u, 1281333180917184591u }, { 10028907631919542777u, 1601666476146480739u }, + { 7924448521472040567u, 2002083095183100924u }, { 14176152362774801162u, 1251301934489438077u }, + { 3885132398186337741u, 1564127418111797597u }, { 9468101516160310080u, 1955159272639746996u }, + { 15140935484454969608u, 1221974545399841872u }, { 479425281859160394u, 1527468181749802341u }, + { 5210967620751338397u, 1909335227187252926u }, { 17091912818251750210u, 1193334516992033078u }, + { 12141518985959911954u, 1491668146240041348u }, { 15176898732449889943u, 1864585182800051685u }, + { 11791404716994875166u, 1165365739250032303u }, { 10127569877816206054u, 1456707174062540379u }, + { 8047776328842869663u, 1820883967578175474u }, { 836348374198811271u, 2276104959472719343u }, + { 7440246761515338900u, 1422565599670449589u }, { 13911994470321561530u, 1778206999588061986u }, + { 8166621051047176104u, 2222758749485077483u }, { 2798295147690791113u, 1389224218428173427u }, + { 17332926989895652603u, 1736530273035216783u }, { 17054472718942177850u, 2170662841294020979u }, + { 8353202440125167204u, 1356664275808763112u }, { 10441503050156459005u, 1695830344760953890u }, + { 3828506775840797949u, 2119787930951192363u }, { 86973725686804766u, 1324867456844495227u }, + { 13943775212390669669u, 1656084321055619033u }, { 3594660960206173375u, 2070105401319523792u }, + { 2246663100128858359u, 1293815875824702370u }, { 12031700912015848757u, 1617269844780877962u }, + { 5816254103165035138u, 2021587305976097453u }, { 5941001823691840913u, 1263492066235060908u }, + { 7426252279614801142u, 1579365082793826135u }, { 4671129331091113523u, 1974206353492282669u }, + { 5225298841145639904u, 1233878970932676668u }, { 6531623551432049880u, 1542348713665845835u }, + { 3552843420862674446u, 1927935892082307294u }, { 16055585193321335241u, 1204959932551442058u }, + { 10846109454796893243u, 1506199915689302573u }, { 18169322836923504458u, 1882749894611628216u }, + { 11355826773077190286u, 1176718684132267635u }, { 9583097447919099954u, 1470898355165334544u }, + { 11978871809898874942u, 1838622943956668180u }, { 14973589762373593678u, 2298278679945835225u }, + { 2440964573842414192u, 1436424174966147016u }, { 3051205717303017741u, 1795530218707683770u }, + { 13037379183483547984u, 2244412773384604712u }, { 8148361989677217490u, 1402757983365377945u }, + { 14797138505523909766u, 1753447479206722431u }, { 13884737113477499304u, 2191809349008403039u }, + { 15595489723564518921u, 1369880843130251899u }, { 14882676136028260747u, 1712351053912814874u }, + { 9379973133180550126u, 2140438817391018593u }, { 17391698254306313589u, 1337774260869386620u }, + { 3292878744173340370u, 1672217826086733276u }, { 4116098430216675462u, 2090272282608416595u }, + { 266718509671728212u, 1306420176630260372u }, { 333398137089660265u, 1633025220787825465u }, + { 5028433689789463235u, 2041281525984781831u }, { 10060300083759496378u, 1275800953740488644u }, + { 12575375104699370472u, 1594751192175610805u }, { 1884160825592049379u, 1993438990219513507u }, + { 17318501580490888525u, 1245899368887195941u }, { 7813068920331446945u, 1557374211108994927u }, + { 5154650131986920777u, 1946717763886243659u }, { 915813323278131534u, 1216698602428902287u }, + { 14979824709379828129u, 1520873253036127858u }, { 9501408849870009354u, 1901091566295159823u }, + { 12855909558809837702u, 1188182228934474889u }, { 2234828893230133415u, 1485227786168093612u }, + { 2793536116537666769u, 1856534732710117015u }, { 8663489100477123587u, 1160334207943823134u }, + { 1605989338741628675u, 1450417759929778918u }, { 11230858710281811652u, 1813022199912223647u }, + { 9426887369424876662u, 2266277749890279559u }, { 12809333633531629769u, 1416423593681424724u }, + { 16011667041914537212u, 1770529492101780905u }, { 6179525747111007803u, 2213161865127226132u }, + { 13085575628799155685u, 1383226165704516332u }, { 16356969535998944606u, 1729032707130645415u }, + { 15834525901571292854u, 2161290883913306769u }, { 2979049660840976177u, 1350806802445816731u }, + { 17558870131333383934u, 1688508503057270913u }, { 8113529608884566205u, 2110635628821588642u }, + { 9682642023980241782u, 1319147268013492901u }, { 16714988548402690132u, 1648934085016866126u }, + { 11670363648648586857u, 2061167606271082658u }, { 11905663298832754689u, 1288229753919426661u }, + { 1047021068258779650u, 1610287192399283327u }, { 15143834390605638274u, 2012858990499104158u }, + { 4853210475701136017u, 1258036869061940099u }, { 1454827076199032118u, 1572546086327425124u }, + { 1818533845248790147u, 1965682607909281405u }, { 3442426662494187794u, 1228551629943300878u }, + { 13526405364972510550u, 1535689537429126097u }, { 3072948650933474476u, 1919611921786407622u }, + { 15755650962115585259u, 1199757451116504763u }, { 15082877684217093670u, 1499696813895630954u }, + { 9630225068416591280u, 1874621017369538693u }, { 8324733676974063502u, 1171638135855961683u }, + { 5794231077790191473u, 1464547669819952104u }, { 7242788847237739342u, 1830684587274940130u }, + { 18276858095901949986u, 2288355734093675162u }, { 16034722328366106645u, 1430222333808546976u }, + { 1596658836748081690u, 1787777917260683721u }, { 6607509564362490017u, 2234722396575854651u }, + { 1823850468512862308u, 1396701497859909157u }, { 6891499104068465790u, 1745876872324886446u }, + { 17837745916940358045u, 2182346090406108057u }, { 4231062170446641922u, 1363966306503817536u }, + { 5288827713058302403u, 1704957883129771920u }, { 6611034641322878003u, 2131197353912214900u }, + { 13355268687681574560u, 1331998346195134312u }, { 16694085859601968200u, 1664997932743917890u }, + { 11644235287647684442u, 2081247415929897363u }, { 4971804045566108824u, 1300779634956185852u }, + { 6214755056957636030u, 1625974543695232315u }, { 3156757802769657134u, 2032468179619040394u }, + { 6584659645158423613u, 1270292612261900246u }, { 17454196593302805324u, 1587865765327375307u }, + { 17206059723201118751u, 1984832206659219134u }, { 6142101308573311315u, 1240520129162011959u }, + { 3065940617289251240u, 1550650161452514949u }, { 8444111790038951954u, 1938312701815643686u }, + { 665883850346957067u, 1211445438634777304u }, { 832354812933696334u, 1514306798293471630u }, + { 10263815553021896226u, 1892883497866839537u }, { 17944099766707154901u, 1183052186166774710u }, + { 13206752671529167818u, 1478815232708468388u }, { 16508440839411459773u, 1848519040885585485u }, + { 12623618533845856310u, 1155324400553490928u }, { 15779523167307320387u, 1444155500691863660u }, + { 1277659885424598868u, 1805194375864829576u }, { 1597074856780748586u, 2256492969831036970u }, + { 5609857803915355770u, 1410308106144398106u }, { 16235694291748970521u, 1762885132680497632u }, + { 1847873790976661535u, 2203606415850622041u }, { 12684136165428883219u, 1377254009906638775u }, + { 11243484188358716120u, 1721567512383298469u }, { 219297180166231438u, 2151959390479123087u }, + { 7054589765244976505u, 1344974619049451929u }, { 13429923224983608535u, 1681218273811814911u }, + { 12175718012802122765u, 2101522842264768639u }, { 14527352785642408584u, 1313451776415480399u }, + { 13547504963625622826u, 1641814720519350499u }, { 12322695186104640628u, 2052268400649188124u }, + { 16925056528170176201u, 1282667750405742577u }, { 7321262604930556539u, 1603334688007178222u }, + { 18374950293017971482u, 2004168360008972777u }, { 4566814905495150320u, 1252605225005607986u }, + { 14931890668723713708u, 1565756531257009982u }, { 9441491299049866327u, 1957195664071262478u }, + { 1289246043478778550u, 1223247290044539049u }, { 6223243572775861092u, 1529059112555673811u }, + { 3167368447542438461u, 1911323890694592264u }, { 1979605279714024038u, 1194577431684120165u }, + { 7086192618069917952u, 1493221789605150206u }, { 18081112809442173248u, 1866527237006437757u }, + { 13606538515115052232u, 1166579523129023598u }, { 7784801107039039482u, 1458224403911279498u }, + { 507629346944023544u, 1822780504889099373u }, { 5246222702107417334u, 2278475631111374216u }, + { 3278889188817135834u, 1424047269444608885u }, { 8710297504448807696u, 1780059086805761106u } +}; + +// IEEE 754 double precision constants +#define DOUBLE_MANTISSA_BITS 52 +#define DOUBLE_EXPONENT_BITS 11 +#define DOUBLE_EXPONENT_BIAS 1023 + +// Helper: floor(log2(value)) using ryu_leading_zeros64 +static inline uint32_t floor_log2(const uint64_t value) { + return 63 - ryu_leading_zeros64(value); +} + +// Helper: log2(5^e) approximation +static inline int32_t log2pow5(const int32_t e) { + return (int32_t) ((((uint32_t) e) * 1217359) >> 19); +} + +// Helper: ceil(log2(5^e)) +static inline int32_t ceil_log2pow5(const int32_t e) { + return log2pow5(e) + 1; +} + +// Helper: max of two int32 +static inline int32_t max32(int32_t a, int32_t b) { + return a < b ? b : a; +} + +// Helper: convert uint64 bits to double +static inline double int64Bits2Double(uint64_t bits) { + double f; + memcpy(&f, &bits, sizeof(double)); + return f; +} + +// Check if value is multiple of 2^p +static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) { + return (value & ((1ull << p) - 1)) == 0; +} + +// Count how many times value is divisible by 5 +// Uses modular inverse to avoid expensive division +static inline uint32_t pow5Factor(uint64_t value) { + const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64) + const uint64_t n_div_5 = 3689348814741910323u; // 2^64 / 5 + uint32_t count = 0; + for (;;) { + value *= m_inv_5; + if (value > n_div_5) + break; + ++count; + } + return count; +} + +// Check if value is multiple of 5^p +// Optimized: uses modular inverse instead of division +static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) { + return pow5Factor(value) >= p; +} + +// 128-bit multiplication with shift +// This is the core operation for converting decimal to binary +#if defined(__SIZEOF_INT128__) +// Use native 128-bit integers if available (GCC/Clang) +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + const unsigned __int128 b0 = ((unsigned __int128) m) * mul[0]; + const unsigned __int128 b2 = ((unsigned __int128) m) * mul[1]; + return (uint64_t) (((b0 >> 64) + b2) >> (j - 64)); +} +#else +// Fallback for systems without 128-bit integers +static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) { + const uint32_t aLo = (uint32_t)a; + const uint32_t aHi = (uint32_t)(a >> 32); + const uint32_t bLo = (uint32_t)b; + const uint32_t bHi = (uint32_t)(b >> 32); + + const uint64_t b00 = (uint64_t)aLo * bLo; + const uint64_t b01 = (uint64_t)aLo * bHi; + const uint64_t b10 = (uint64_t)aHi * bLo; + const uint64_t b11 = (uint64_t)aHi * bHi; + + const uint32_t b00Lo = (uint32_t)b00; + const uint32_t b00Hi = (uint32_t)(b00 >> 32); + + const uint64_t mid1 = b10 + b00Hi; + const uint32_t mid1Lo = (uint32_t)(mid1); + const uint32_t mid1Hi = (uint32_t)(mid1 >> 32); + + const uint64_t mid2 = b01 + mid1Lo; + const uint32_t mid2Lo = (uint32_t)(mid2); + const uint32_t mid2Hi = (uint32_t)(mid2 >> 32); + + const uint64_t pHi = b11 + mid1Hi + mid2Hi; + const uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo; + + *productHi = pHi; + return pLo; +} + +static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) { + return (hi << (64 - dist)) | (lo >> dist); +} + +static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) { + uint64_t high1; + const uint64_t low1 = umul128(m, mul[1], &high1); + uint64_t high0; + umul128(m, mul[0], &high0); + const uint64_t sum = high0 + low1; + if (sum < high0) { + ++high1; + } + return shiftright128(sum, high1, j - 64); +} +#endif + +// Main conversion function: decimal mantissa+exponent to IEEE 754 double +// Optimized for JSON parsing with fast paths for edge cases +static inline double ryu_s2d_from_parts(uint64_t m10, int m10digits, int32_t e10, bool signedM) { + // Fast path: handle zero explicitly (e.g., "0.0", "0e0") + if (m10 == 0) { + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + // Fast path: handle overflow/underflow early + if (m10digits + e10 <= -324) { + // Underflow to zero + return int64Bits2Double(((uint64_t) signedM) << 63); + } + + if (m10digits + e10 >= 310) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Convert decimal to binary: m10 * 10^e10 = m2 * 2^e2 + int32_t e2; + uint64_t m2; + bool trailingZeros; + + if (e10 >= 0) { + // Positive exponent: multiply by 5^e10 and adjust binary exponent + e2 = floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_SPLIT[e10], j); + trailingZeros = e2 < e10 || (e2 - e10 < 64 && multipleOfPowerOf2(m10, e2 - e10)); + } else { + // Negative exponent: divide by 5^(-e10) + e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1); + int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT; + m2 = mulShift64(m10, DOUBLE_POW5_INV_SPLIT[-e10], j); + trailingZeros = multipleOfPowerOf5(m10, -e10); + } + + // Compute IEEE 754 exponent + uint32_t ieee_e2 = (uint32_t) max32(0, e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2)); + + if (ieee_e2 > 0x7fe) { + // Overflow to infinity + return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL); + } + + // Compute shift amount for rounding + int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS; + + // IEEE 754 round-to-even (banker's rounding) + trailingZeros &= (m2 & ((1ull << (shift - 1)) - 1)) == 0; + uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1; + bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0)); + + uint64_t ieee_m2 = (m2 >> shift) + roundUp; + ieee_m2 &= (1ull << DOUBLE_MANTISSA_BITS) - 1; + + if (ieee_m2 == 0 && roundUp) { + ieee_e2++; + } + + // Pack sign, exponent, and mantissa into IEEE 754 format + // Match original Ryu: group sign+exponent, then shift and add mantissa + uint64_t ieee = (((((uint64_t) signedM) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2; + return int64Bits2Double(ieee); +} + +#endif // RYU_H |
