summaryrefslogtreecommitdiff
path: root/ext/json
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json')
-rw-r--r--ext/json/fbuffer/fbuffer.h126
-rw-r--r--ext/json/generator/depend1
-rw-r--r--ext/json/generator/extconf.rb5
-rw-r--r--ext/json/generator/generator.c910
-rw-r--r--ext/json/json.h134
-rw-r--r--ext/json/lib/json.rb59
-rw-r--r--ext/json/lib/json/common.rb132
-rw-r--r--ext/json/lib/json/ext/generator/state.rb25
-rw-r--r--ext/json/lib/json/generic_object.rb8
-rw-r--r--ext/json/lib/json/version.rb2
-rw-r--r--ext/json/parser/depend2
-rw-r--r--ext/json/parser/extconf.rb8
-rw-r--r--ext/json/parser/parser.c1832
-rw-r--r--ext/json/simd/simd.h64
-rw-r--r--ext/json/vendor/fpconv.c25
-rw-r--r--ext/json/vendor/ryu.h819
16 files changed, 2846 insertions, 1306 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h
index d32371476c..b4f5266ca5 100644
--- a/ext/json/fbuffer/fbuffer.h
+++ b/ext/json/fbuffer/fbuffer.h
@@ -1,47 +1,9 @@
#ifndef _FBUFFER_H_
#define _FBUFFER_H_
-#include "ruby.h"
-#include "ruby/encoding.h"
+#include "../json.h"
#include "../vendor/jeaiii-ltoa.h"
-/* shims */
-/* This is the fallback definition from Ruby 3.4 */
-
-#ifndef RBIMPL_STDBOOL_H
-#if defined(__cplusplus)
-# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
-# include <cstdbool>
-# endif
-#elif defined(HAVE_STDBOOL_H)
-# include <stdbool.h>
-#elif !defined(HAVE__BOOL)
-typedef unsigned char _Bool;
-# define bool _Bool
-# define true ((_Bool)+1)
-# define false ((_Bool)+0)
-# define __bool_true_false_are_defined
-#endif
-#endif
-
-#ifndef RB_UNLIKELY
-#define RB_UNLIKELY(expr) expr
-#endif
-
-#ifndef RB_LIKELY
-#define RB_LIKELY(expr) expr
-#endif
-
-#ifndef MAYBE_UNUSED
-# define MAYBE_UNUSED(x) x
-#endif
-
-#ifdef RUBY_DEBUG
-#ifndef JSON_DEBUG
-#define JSON_DEBUG RUBY_DEBUG
-#endif
-#endif
-
enum fbuffer_type {
FBUFFER_HEAP_ALLOCATED = 0,
FBUFFER_STACK_ALLOCATED = 1,
@@ -49,11 +11,11 @@ enum fbuffer_type {
typedef struct FBufferStruct {
enum fbuffer_type type;
- unsigned long initial_length;
- unsigned long len;
- unsigned long capa;
-#ifdef JSON_DEBUG
- unsigned long requested;
+ size_t initial_length;
+ size_t len;
+ size_t capa;
+#if JSON_DEBUG
+ size_t requested;
#endif
char *ptr;
VALUE io;
@@ -70,12 +32,12 @@ typedef struct FBufferStruct {
static void fbuffer_free(FBuffer *fb);
static void fbuffer_clear(FBuffer *fb);
-static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len);
+static void fbuffer_append(FBuffer *fb, const char *newstr, size_t len);
static void fbuffer_append_long(FBuffer *fb, long number);
static inline void fbuffer_append_char(FBuffer *fb, char newchr);
static VALUE fbuffer_finalize(FBuffer *fb);
-static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size)
+static void fbuffer_stack_init(FBuffer *fb, size_t initial_length, char *stack_buffer, size_t stack_buffer_size)
{
fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT;
if (stack_buffer) {
@@ -83,14 +45,14 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *
fb->ptr = stack_buffer;
fb->capa = stack_buffer_size;
}
-#ifdef JSON_DEBUG
+#if JSON_DEBUG
fb->requested = 0;
#endif
}
-static inline void fbuffer_consumed(FBuffer *fb, unsigned long consumed)
+static inline void fbuffer_consumed(FBuffer *fb, size_t consumed)
{
-#ifdef JSON_DEBUG
+#if JSON_DEBUG
if (consumed > fb->requested) {
rb_bug("fbuffer: Out of bound write");
}
@@ -102,7 +64,7 @@ static inline void fbuffer_consumed(FBuffer *fb, unsigned long consumed)
static void fbuffer_free(FBuffer *fb)
{
if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) {
- ruby_xfree(fb->ptr);
+ JSON_SIZED_FREE_N(fb->ptr, fb->capa);
}
}
@@ -117,7 +79,7 @@ static void fbuffer_flush(FBuffer *fb)
fbuffer_clear(fb);
}
-static void fbuffer_realloc(FBuffer *fb, unsigned long required)
+static void fbuffer_realloc(FBuffer *fb, size_t required)
{
if (required > fb->capa) {
if (fb->type == FBUFFER_STACK_ALLOCATED) {
@@ -126,13 +88,13 @@ static void fbuffer_realloc(FBuffer *fb, unsigned long required)
fb->type = FBUFFER_HEAP_ALLOCATED;
MEMCPY(fb->ptr, old_buffer, char, fb->len);
} else {
- REALLOC_N(fb->ptr, char, required);
+ JSON_SIZED_REALLOC_N(fb->ptr, char, required, fb->capa);
}
fb->capa = required;
}
}
-static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
+static void fbuffer_do_inc_capa(FBuffer *fb, size_t requested)
{
if (RB_UNLIKELY(fb->io)) {
if (fb->capa < FBUFFER_IO_BUFFER_SIZE) {
@@ -146,7 +108,7 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
}
}
- unsigned long required;
+ size_t required;
if (RB_UNLIKELY(!fb->ptr)) {
fb->ptr = ALLOC_N(char, fb->initial_length);
@@ -158,9 +120,9 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
fbuffer_realloc(fb, required);
}
-static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
+static inline void fbuffer_inc_capa(FBuffer *fb, size_t requested)
{
-#ifdef JSON_DEBUG
+#if JSON_DEBUG
fb->requested = requested;
#endif
@@ -169,19 +131,33 @@ static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
}
}
-static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len)
+static inline size_t fbuffer_size_mul_or_raise(size_t a, size_t b)
+{
+ size_t result = a * b;
+ if (RB_UNLIKELY(a != 0 && (result / a) != b)) {
+ rb_raise(rb_eArgError, "Buffer overflow, the resulting document is too large to be generated");
+ }
+ return result;
+}
+
+static inline void fbuffer_append_reserved(FBuffer *fb, const char *newstr, size_t len)
+{
+ MEMCPY(fb->ptr + fb->len, newstr, char, len);
+ fbuffer_consumed(fb, len);
+}
+
+static inline void fbuffer_append(FBuffer *fb, const char *newstr, size_t len)
{
if (len > 0) {
fbuffer_inc_capa(fb, len);
- MEMCPY(fb->ptr + fb->len, newstr, char, len);
- fbuffer_consumed(fb, len);
+ fbuffer_append_reserved(fb, newstr, len);
}
}
/* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */
static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr)
{
-#ifdef JSON_DEBUG
+#if JSON_DEBUG
if (fb->requested < 1) {
rb_bug("fbuffer: unreserved write");
}
@@ -194,12 +170,29 @@ static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr)
static void fbuffer_append_str(FBuffer *fb, VALUE str)
{
- const char *newstr = StringValuePtr(str);
- unsigned long len = RSTRING_LEN(str);
+ const char *ptr;
+ size_t len;
+ RSTRING_GETMEM(str, ptr, len);
+ fbuffer_append(fb, ptr, len);
RB_GC_GUARD(str);
+}
- fbuffer_append(fb, newstr, len);
+static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat)
+{
+ const char *ptr;
+ size_t len;
+ RSTRING_GETMEM(str, ptr, len);
+
+ fbuffer_inc_capa(fb, fbuffer_size_mul_or_raise(repeat, len));
+ while (repeat) {
+#if JSON_DEBUG
+ fb->requested = len;
+#endif
+ fbuffer_append_reserved(fb, ptr, len);
+ repeat--;
+ }
+ RB_GC_GUARD(str);
}
static inline void fbuffer_append_char(FBuffer *fb, char newchr)
@@ -257,14 +250,11 @@ static VALUE fbuffer_finalize(FBuffer *fb)
{
if (fb->io) {
fbuffer_flush(fb);
- fbuffer_free(fb);
rb_io_flush(fb->io);
return fb->io;
} else {
- VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
- fbuffer_free(fb);
- return result;
+ return rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb));
}
}
-#endif
+#endif // _FBUFFER_H_
diff --git a/ext/json/generator/depend b/ext/json/generator/depend
index aee4ab94eb..3ba4acfdd2 100644
--- a/ext/json/generator/depend
+++ b/ext/json/generator/depend
@@ -178,6 +178,7 @@ generator.o: $(hdrdir)/ruby/ruby.h
generator.o: $(hdrdir)/ruby/st.h
generator.o: $(hdrdir)/ruby/subst.h
generator.o: $(srcdir)/../fbuffer/fbuffer.h
+generator.o: $(srcdir)/../json.h
generator.o: $(srcdir)/../simd/simd.h
generator.o: $(srcdir)/../vendor/fpconv.c
generator.o: $(srcdir)/../vendor/jeaiii-ltoa.h
diff --git a/ext/json/generator/extconf.rb b/ext/json/generator/extconf.rb
index fb9afd07f7..33af03ea30 100644
--- a/ext/json/generator/extconf.rb
+++ b/ext/json/generator/extconf.rb
@@ -5,8 +5,11 @@ if RUBY_ENGINE == 'truffleruby'
File.write('Makefile', dummy_makefile("").join)
else
append_cflags("-std=c99")
+ have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3
+ have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1
+
$defs << "-DJSON_GENERATOR"
- $defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]
+ $defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0"
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
load __dir__ + "/../simd/conf.rb"
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index 9c6ed93049..82853633ba 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -1,4 +1,4 @@
-#include "ruby.h"
+#include "../json.h"
#include "../fbuffer/fbuffer.h"
#include "../vendor/fpconv.c"
@@ -9,6 +9,12 @@
/* ruby api and some helpers */
+enum duplicate_key_action {
+ JSON_DEPRECATED = 0,
+ JSON_IGNORE,
+ JSON_RAISE,
+};
+
typedef struct JSON_Generator_StateStruct {
VALUE indent;
VALUE space;
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
long depth;
long buffer_initial_length;
+ enum duplicate_key_action on_duplicate_key;
+
+ bool as_json_single_arg;
bool allow_nan;
bool ascii_only;
bool script_safe;
bool strict;
} JSON_Generator_State;
-#ifndef RB_UNLIKELY
-#define RB_UNLIKELY(cond) (cond)
-#endif
-
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
-static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
-static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
+static ID i_to_s, i_to_json, i_new, i_encode;
+static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
@@ -55,8 +60,11 @@ struct generate_json_data {
JSON_Generator_State *state;
VALUE obj;
generator_func func;
+ long depth;
};
+static SIMD_Implementation simd_impl;
+
static VALUE cState_from_state_s(VALUE self, VALUE opts);
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
@@ -66,9 +74,6 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
-#ifdef RUBY_INTEGER_UNIFICATION
-static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
-#endif
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
@@ -76,23 +81,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
static int usascii_encindex, utf8_encindex, binary_encindex;
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_generator_error_str(VALUE invalid_object, VALUE str)
+NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
{
+ rb_enc_associate_index(str, utf8_encindex);
VALUE exc = rb_exc_new_str(eGeneratorError, str);
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
rb_exc_raise(exc);
}
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
#ifdef RBIMPL_ATTR_FORMAT
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
#endif
-static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
+NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
@@ -127,13 +127,7 @@ typedef struct _search_state {
#endif /* HAVE_SIMD */
} search_state;
-#if (defined(__GNUC__ ) || defined(__clang__))
-#define FORCE_INLINE __attribute__((always_inline))
-#else
-#define FORCE_INLINE
-#endif
-
-static inline FORCE_INLINE void search_flush(search_state *search)
+ALWAYS_INLINE(static) void search_flush(search_state *search)
{
// Do not remove this conditional without profiling, specifically escape-heavy text.
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
@@ -160,8 +154,6 @@ static const unsigned char escape_table_basic[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
-static unsigned char (*search_escape_basic_impl)(search_state *);
-
static inline unsigned char search_escape_basic(search_state *search)
{
while (search->ptr < search->end) {
@@ -176,7 +168,7 @@ static inline unsigned char search_escape_basic(search_state *search)
return 0;
}
-static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
+ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
{
const unsigned char ch = (unsigned char)*search->ptr;
switch (ch) {
@@ -217,11 +209,39 @@ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
* Everything else (should be UTF-8) is just passed through and
* appended to the result.
*/
+
+
+#if defined(HAVE_SIMD_NEON)
+static inline unsigned char search_escape_basic_neon(search_state *search);
+#elif defined(HAVE_SIMD_SSE2)
+static inline unsigned char search_escape_basic_sse2(search_state *search);
+#endif
+
+static inline unsigned char search_escape_basic(search_state *search);
+
static inline void convert_UTF8_to_JSON(search_state *search)
{
- while (search_escape_basic_impl(search)) {
+#ifdef HAVE_SIMD
+#if defined(HAVE_SIMD_NEON)
+ while (search_escape_basic_neon(search)) {
+ escape_UTF8_char_basic(search);
+ }
+#elif defined(HAVE_SIMD_SSE2)
+ if (simd_impl == SIMD_SSE2) {
+ while (search_escape_basic_sse2(search)) {
+ escape_UTF8_char_basic(search);
+ }
+ return;
+ }
+ while (search_escape_basic(search)) {
+ escape_UTF8_char_basic(search);
+ }
+#endif
+#else
+ while (search_escape_basic(search)) {
escape_UTF8_char_basic(search);
}
+#endif /* HAVE_SIMD */
}
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
@@ -263,8 +283,10 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
#ifdef HAVE_SIMD
-static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
+ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
{
+ RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
search_flush(search);
@@ -274,19 +296,25 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
char *s = (buf->ptr + buf->len);
// Pad the buffer with dummy characters that won't need escaping.
- // This seem wateful at first sight, but memset of vector length is very fast.
- memset(s, 'X', vec_len);
+ // This seem wasteful at first sight, but memset of vector length is very fast.
+ // This is a space as it can be directly represented as an immediate on AArch64.
+ memset(s, ' ', vec_len);
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
- MEMCPY(s, search->ptr, char, len);
+ if (vec_len == 16) {
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
+ json_fast_memcpy16(s, search->ptr, len);
+ } else {
+ MEMCPY(s, search->ptr, char, len);
+ }
return s;
}
#ifdef HAVE_SIMD_NEON
-static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
+ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
{
uint64_t mask = search->matches_mask;
uint32_t index = trailing_zeros64(mask) >> 2;
@@ -400,7 +428,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
#ifdef HAVE_SIMD_SSE2
-static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
+ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
{
int mask = search->matches_mask;
int index = trailing_zeros(mask);
@@ -424,7 +452,7 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
#define TARGET_SSE2
#endif
-static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
+ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
// There are more matches if search->matches_mask > 0.
@@ -672,233 +700,6 @@ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned
}
}
-/*
- * Document-module: JSON::Ext::Generator
- *
- * This is the JSON generator implemented as a C extension. It can be
- * configured to be used by setting
- *
- * JSON.generator = JSON::Ext::Generator
- *
- * with the method generator= in JSON.
- *
- */
-
-/* Explanation of the following: that's the only way to not pollute
- * standard library's docs with GeneratorMethods::<ClassName> which
- * are uninformative and take a large place in a list of classes
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Array
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Bignum
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::FalseClass
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Fixnum
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Float
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Hash
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Integer
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::NilClass
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::Object
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::String
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::String::Extend
- * :nodoc:
- */
-
-/*
- * Document-module: JSON::Ext::Generator::GeneratorMethods::TrueClass
- * :nodoc:
- */
-
-/*
- * call-seq: to_json(state = nil)
- *
- * Returns a JSON string containing a JSON object, that is generated from
- * this Hash instance.
- * _state_ is a JSON::State object, that can also be used to configure the
- * produced JSON string output further.
- */
-static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_object, Qfalse);
-}
-
-/*
- * call-seq: to_json(state = nil)
- *
- * Returns a JSON string containing a JSON array, that is generated from
- * this Array instance.
- * _state_ is a JSON::State object, that can also be used to configure the
- * produced JSON string output further.
- */
-static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
-}
-
-#ifdef RUBY_INTEGER_UNIFICATION
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Integer number.
- */
-static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse);
-}
-
-#else
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Integer number.
- */
-static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse);
-}
-
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Integer number.
- */
-static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse);
-}
-#endif
-
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string representation for this Float number.
- */
-static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
-}
-
-/*
- * call-seq: to_json(*)
- *
- * This string should be encoded with UTF-8 A call to this method
- * returns a JSON string encoded with UTF16 big endian characters as
- * \u????.
- */
-static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
- return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
-}
-
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string for true: 'true'.
- */
-static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- return rb_utf8_str_new("true", 4);
-}
-
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string for false: 'false'.
- */
-static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- return rb_utf8_str_new("false", 5);
-}
-
-/*
- * call-seq: to_json(*)
- *
- * Returns a JSON string for nil: 'null'.
- */
-static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
-{
- rb_check_arity(argc, 0, 1);
- return rb_utf8_str_new("null", 4);
-}
-
-/*
- * call-seq: to_json(*)
- *
- * Converts this object to a string (calling #to_s), converts
- * it to a JSON string, and returns the result. This is a fallback, if no
- * special method #to_json was defined for some object.
- */
-static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
-{
- VALUE state;
- VALUE string = rb_funcall(self, i_to_s, 0);
- rb_scan_args(argc, argv, "01", &state);
- Check_Type(string, T_STRING);
- state = cState_from_state_s(cState, state);
- return cState_partial_generate(state, string, generate_json_string, Qfalse);
-}
-
static void State_mark(void *ptr)
{
JSON_Generator_State *state = ptr;
@@ -921,32 +722,24 @@ static void State_compact(void *ptr)
state->as_json = rb_gc_location(state->as_json);
}
-static void State_free(void *ptr)
-{
- JSON_Generator_State *state = ptr;
- ruby_xfree(state);
-}
-
static size_t State_memsize(const void *ptr)
{
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ return 0;
+#else
return sizeof(JSON_Generator_State);
-}
-
-#ifndef HAVE_RB_EXT_RACTOR_SAFE
-# undef RUBY_TYPED_FROZEN_SHAREABLE
-# define RUBY_TYPED_FROZEN_SHAREABLE 0
#endif
+}
static const rb_data_type_t JSON_Generator_State_type = {
- "JSON/Generator/State",
- {
+ .wrap_struct_name = "JSON/Generator/State",
+ .function = {
.dmark = State_mark,
- .dfree = State_free,
+ .dfree = RUBY_DEFAULT_FREE,
.dsize = State_memsize,
.dcompact = State_compact,
},
- 0, 0,
- RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
+ .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
};
static void state_init(JSON_Generator_State *state)
@@ -978,18 +771,24 @@ static void vstate_spill(struct generate_json_data *data)
RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
}
-static inline VALUE vstate_get(struct generate_json_data *data)
+static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
{
if (RB_UNLIKELY(!data->vstate)) {
vstate_spill(data);
}
- return data->vstate;
+ GET_STATE(data->vstate);
+ state->depth = data->depth;
+ VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
+ // no need to restore state->depth, vstate is just a temporary State
+ return tmp;
}
-struct hash_foreach_arg {
- struct generate_json_data *data;
- int iter;
-};
+static VALUE
+json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
+{
+ VALUE proc_args[2] = {object, is_key};
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
+}
static VALUE
convert_string_subclass(VALUE key)
@@ -1006,6 +805,159 @@ convert_string_subclass(VALUE key)
return key_to_s;
}
+static bool enc_utf8_compatible_p(int enc_idx)
+{
+ if (enc_idx == usascii_encindex) return true;
+ if (enc_idx == utf8_encindex) return true;
+ return false;
+}
+
+static VALUE encode_json_string_try(VALUE str)
+{
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
+}
+
+static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
+{
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
+ return Qundef;
+}
+
+static inline int json_str_coderange(VALUE str) {
+ int coderange = RB_ENC_CODERANGE(str);
+ if (coderange == RUBY_ENC_CODERANGE_UNKNOWN) {
+ coderange = rb_enc_str_coderange(str);
+ }
+ return coderange;
+}
+
+static inline bool valid_json_string_p(VALUE str)
+{
+ int coderange = json_str_coderange(str);
+
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
+ return true;
+ }
+
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
+ }
+
+ return false;
+}
+
+NOINLINE(static) VALUE convert_invalid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
+{
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
+ if (coerced_str != str) {
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
+ if (!valid_json_string_p(coerced_str)) {
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
+ }
+ } else {
+ // as_json could return another type than T_STRING
+ if (is_key) {
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
+ }
+ }
+
+ return coerced_str;
+ }
+ }
+
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
+ switch (rb_enc_str_coderange(utf8_string)) {
+ case ENC_CODERANGE_7BIT:
+ return utf8_string;
+ case ENC_CODERANGE_VALID:
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
+ // TODO: Raise in 3.0.0
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
+ return utf8_string;
+ break;
+ }
+ }
+
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
+}
+
+ALWAYS_INLINE(static) VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
+{
+ if (RB_LIKELY(valid_json_string_p(str))) {
+ return str;
+ }
+ else {
+ return convert_invalid_encoding(data, str, as_json_called, is_key);
+ }
+}
+
+static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ fbuffer_append_char(buffer, '"');
+
+ long len;
+ search_state search;
+ search.buffer = buffer;
+ RSTRING_GETMEM(obj, search.ptr, len);
+ search.cursor = search.ptr;
+ search.end = search.ptr + len;
+
+#ifdef HAVE_SIMD
+ search.matches_mask = 0;
+ search.has_matches = false;
+ search.chunk_base = NULL;
+ search.chunk_end = NULL;
+#endif /* HAVE_SIMD */
+
+ switch (json_str_coderange(obj)) {
+ case ENC_CODERANGE_7BIT:
+ case ENC_CODERANGE_VALID:
+ if (RB_UNLIKELY(data->state->ascii_only)) {
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
+ convert_UTF8_to_script_safe_JSON(&search);
+ } else {
+ convert_UTF8_to_JSON(&search);
+ }
+ break;
+ default:
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
+ break;
+ }
+ fbuffer_append_char(buffer, '"');
+}
+
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ obj = ensure_valid_encoding(data, obj, false, false);
+ raw_generate_json_string(buffer, data, obj);
+}
+
+struct hash_foreach_arg {
+ VALUE hash;
+ struct generate_json_data *data;
+ int first_key_type;
+ bool first;
+ bool mixed_keys_encountered;
+};
+
+NOINLINE(static) void
+json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
+{
+ if (arg->mixed_keys_encountered) {
+ return;
+ }
+ arg->mixed_keys_encountered = true;
+
+ JSON_Generator_State *state = arg->data->state;
+ if (state->on_duplicate_key != JSON_IGNORE) {
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
+ }
+}
+
static int
json_object_i(VALUE key, VALUE val, VALUE _arg)
{
@@ -1015,22 +967,34 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
FBuffer *buffer = data->buffer;
JSON_Generator_State *state = data->state;
- long depth = state->depth;
- int j;
+ long depth = data->depth;
+ int key_type = rb_type(key);
+
+ if (arg->first) {
+ arg->first = false;
+ arg->first_key_type = key_type;
+ }
+ else {
+ fbuffer_append_char(buffer, ',');
+ }
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
if (RB_UNLIKELY(data->state->object_nl)) {
fbuffer_append_str(buffer, data->state->object_nl);
}
if (RB_UNLIKELY(data->state->indent)) {
- for (j = 0; j < depth; j++) {
- fbuffer_append_str(buffer, data->state->indent);
- }
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
VALUE key_to_s;
- switch (rb_type(key)) {
+ bool as_json_called = false;
+
+ start:
+ switch (key_type) {
case T_STRING:
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
+ json_inspect_hash_with_mixed_keys(arg);
+ }
+
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
key_to_s = key;
} else {
@@ -1038,15 +1002,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
}
break;
case T_SYMBOL:
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
+ json_inspect_hash_with_mixed_keys(arg);
+ }
+
key_to_s = rb_sym2str(key);
break;
default:
+ if (data->state->strict) {
+ if (RTEST(data->state->as_json) && !as_json_called) {
+ key = json_call_as_json(data->state, key, Qtrue);
+ key_type = rb_type(key);
+ as_json_called = true;
+ goto start;
+ } else {
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
+ }
+ }
key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
break;
}
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
+
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
- generate_json_string(buffer, data, key_to_s);
+ raw_generate_json_string(buffer, data, key_to_s);
} else {
generate_json(buffer, data, key_to_s);
}
@@ -1055,46 +1035,43 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
generate_json(buffer, data, val);
- arg->iter++;
return ST_CONTINUE;
}
static inline long increase_depth(struct generate_json_data *data)
{
JSON_Generator_State *state = data->state;
- long depth = ++state->depth;
+ long depth = ++data->depth;
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
}
return depth;
}
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- int j;
long depth = increase_depth(data);
if (RHASH_SIZE(obj) == 0) {
fbuffer_append(buffer, "{}", 2);
- --data->state->depth;
+ --data->depth;
return;
}
fbuffer_append_char(buffer, '{');
struct hash_foreach_arg arg = {
+ .hash = obj,
.data = data,
- .iter = 0,
+ .first = true,
};
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
- depth = --data->state->depth;
+ depth = --data->depth;
if (RB_UNLIKELY(data->state->object_nl)) {
fbuffer_append_str(buffer, data->state->object_nl);
if (RB_UNLIKELY(data->state->indent)) {
- for (j = 0; j < depth; j++) {
- fbuffer_append_str(buffer, data->state->indent);
- }
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
}
fbuffer_append_char(buffer, '}');
@@ -1102,125 +1079,41 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
- int i, j;
long depth = increase_depth(data);
if (RARRAY_LEN(obj) == 0) {
fbuffer_append(buffer, "[]", 2);
- --data->state->depth;
+ --data->depth;
return;
}
fbuffer_append_char(buffer, '[');
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
- for (i = 0; i < RARRAY_LEN(obj); i++) {
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
if (i > 0) {
fbuffer_append_char(buffer, ',');
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
}
if (RB_UNLIKELY(data->state->indent)) {
- for (j = 0; j < depth; j++) {
- fbuffer_append_str(buffer, data->state->indent);
- }
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
generate_json(buffer, data, RARRAY_AREF(obj, i));
}
- data->state->depth = --depth;
+ data->depth = --depth;
if (RB_UNLIKELY(data->state->array_nl)) {
fbuffer_append_str(buffer, data->state->array_nl);
if (RB_UNLIKELY(data->state->indent)) {
- for (j = 0; j < depth; j++) {
- fbuffer_append_str(buffer, data->state->indent);
- }
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
}
}
fbuffer_append_char(buffer, ']');
}
-static inline int enc_utf8_compatible_p(int enc_idx)
-{
- if (enc_idx == usascii_encindex) return 1;
- if (enc_idx == utf8_encindex) return 1;
- return 0;
-}
-
-static VALUE encode_json_string_try(VALUE str)
-{
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
-}
-
-static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
-{
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
- return Qundef;
-}
-
-static inline VALUE ensure_valid_encoding(VALUE str)
-{
- int encindex = RB_ENCODING_GET(str);
- VALUE utf8_string;
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
- if (encindex == binary_encindex) {
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
- switch (rb_enc_str_coderange(utf8_string)) {
- case ENC_CODERANGE_7BIT:
- return utf8_string;
- case ENC_CODERANGE_VALID:
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
- // TODO: Raise in 3.0.0
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
- return utf8_string;
- break;
- }
- }
-
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
- }
- return str;
-}
-
-static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
-{
- obj = ensure_valid_encoding(obj);
-
- fbuffer_append_char(buffer, '"');
-
- long len;
- search_state search;
- search.buffer = buffer;
- RSTRING_GETMEM(obj, search.ptr, len);
- search.cursor = search.ptr;
- search.end = search.ptr + len;
-
-#ifdef HAVE_SIMD
- search.matches_mask = 0;
- search.has_matches = false;
- search.chunk_base = NULL;
-#endif /* HAVE_SIMD */
-
- switch (rb_enc_str_coderange(obj)) {
- case ENC_CODERANGE_7BIT:
- case ENC_CODERANGE_VALID:
- if (RB_UNLIKELY(data->state->ascii_only)) {
- convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
- } else if (RB_UNLIKELY(data->state->script_safe)) {
- convert_UTF8_to_script_safe_JSON(&search);
- } else {
- convert_UTF8_to_JSON(&search);
- }
- break;
- default:
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
- break;
- }
- fbuffer_append_char(buffer, '"');
-}
-
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
VALUE tmp;
if (rb_respond_to(obj, i_to_json)) {
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
+ tmp = json_call_to_json(data, obj);
Check_Type(tmp, T_STRING);
fbuffer_append_str(buffer, tmp);
} else {
@@ -1262,19 +1155,9 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
VALUE tmp = rb_funcall(obj, i_to_s, 0);
- fbuffer_append_str(buffer, tmp);
+ fbuffer_append_str(buffer, StringValue(tmp));
}
-#ifdef RUBY_INTEGER_UNIFICATION
-static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
-{
- if (FIXNUM_P(obj))
- generate_json_fixnum(buffer, data, obj);
- else
- generate_json_bignum(buffer, data, obj);
-}
-#endif
-
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
{
double value = RFLOAT_VALUE(obj);
@@ -1283,11 +1166,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
if (!allow_nan) {
if (data->state->strict && data->state->as_json) {
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
if (casted_obj != obj) {
increase_depth(data);
generate_json(buffer, data, casted_obj);
- data->state->depth--;
+ data->depth--;
return;
}
}
@@ -1300,12 +1183,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
}
/* This implementation writes directly into the buffer. We reserve
- * the 28 characters that fpconv_dtoa states as its maximum.
+ * the 32 characters that fpconv_dtoa states as its maximum.
*/
- fbuffer_inc_capa(buffer, 28);
+ fbuffer_inc_capa(buffer, 32);
char* d = buffer->ptr + buffer->len;
int len = fpconv_dtoa(value, d);
-
/* fpconv_dtoa converts a float to its shortest string representation,
* but it adds a ".0" if this is a plain integer.
*/
@@ -1319,7 +1201,7 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
fbuffer_append_str(buffer, fragment);
}
-static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+static inline void generate_json_general(FBuffer *buffer, struct generate_json_data *data, VALUE obj, bool fallback)
{
bool as_json_called = false;
start:
@@ -1346,22 +1228,31 @@ start:
generate_json_bignum(buffer, data, obj);
break;
case T_HASH:
- if (klass != rb_cHash) goto general;
+ if (fallback && klass != rb_cHash) goto general;
generate_json_object(buffer, data, obj);
break;
case T_ARRAY:
- if (klass != rb_cArray) goto general;
+ if (fallback && klass != rb_cArray) goto general;
generate_json_array(buffer, data, obj);
break;
case T_STRING:
- if (klass != rb_cString) goto general;
- generate_json_string(buffer, data, obj);
+ if (fallback && klass != rb_cString) goto general;
+
+ if (RB_LIKELY(valid_json_string_p(obj))) {
+ raw_generate_json_string(buffer, data, obj);
+ } else if (as_json_called) {
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
+ } else {
+ obj = ensure_valid_encoding(data, obj, false, false);
+ as_json_called = true;
+ goto start;
+ }
break;
case T_SYMBOL:
generate_json_symbol(buffer, data, obj);
break;
case T_FLOAT:
- if (klass != rb_cFloat) goto general;
+ if (fallback && klass != rb_cFloat) goto general;
generate_json_float(buffer, data, obj);
break;
case T_STRUCT:
@@ -1372,7 +1263,7 @@ start:
general:
if (data->state->strict) {
if (RTEST(data->state->as_json) && !as_json_called) {
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
+ obj = json_call_as_json(data->state, obj, Qfalse);
as_json_called = true;
goto start;
} else {
@@ -1385,26 +1276,34 @@ start:
}
}
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ generate_json_general(buffer, data, obj, true);
+}
+
+static void generate_json_no_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
+{
+ generate_json_general(buffer, data, obj, false);
+}
+
static VALUE generate_json_try(VALUE d)
{
struct generate_json_data *data = (struct generate_json_data *)d;
data->func(data->buffer, data, data->obj);
- return Qnil;
+ return fbuffer_finalize(data->buffer);
}
-static VALUE generate_json_rescue(VALUE d, VALUE exc)
+static VALUE generate_json_ensure(VALUE d)
{
struct generate_json_data *data = (struct generate_json_data *)d;
fbuffer_free(data->buffer);
- rb_exc_raise(exc);
-
return Qundef;
}
-static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
+static inline VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
{
GET_STATE(self);
@@ -1416,14 +1315,13 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
struct generate_json_data data = {
.buffer = &buffer,
- .vstate = self,
+ .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
.state = state,
+ .depth = state->depth,
.obj = obj,
.func = func
};
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
-
- return fbuffer_finalize(&buffer);
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
}
/* call-seq:
@@ -1439,10 +1337,16 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
rb_check_arity(argc, 1, 2);
VALUE obj = argv[0];
VALUE io = argc > 1 ? argv[1] : Qnil;
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
- GET_STATE(self);
- (void)state;
- return result;
+ return cState_partial_generate(self, obj, generate_json, io);
+}
+
+/* :nodoc: */
+static VALUE cState_generate_no_fallback(int argc, VALUE *argv, VALUE self)
+{
+ rb_check_arity(argc, 1, 2);
+ VALUE obj = argv[0];
+ VALUE io = argc > 1 ? argv[1] : Qnil;
+ return cState_partial_generate(self, obj, generate_json_no_fallback, io);
}
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1467,12 +1371,14 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
MEMCPY(objState, origState, JSON_Generator_State, 1);
- objState->indent = origState->indent;
- objState->space = origState->space;
- objState->space_before = origState->space_before;
- objState->object_nl = origState->object_nl;
- objState->array_nl = origState->array_nl;
- objState->as_json = origState->as_json;
+
+ RB_OBJ_WRITTEN(obj, Qundef, objState->indent);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->space);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->space_before);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl);
+ RB_OBJ_WRITTEN(obj, Qundef, objState->as_json);
+
return obj;
}
@@ -1523,6 +1429,7 @@ static VALUE string_config(VALUE config)
*/
static VALUE cState_indent_set(VALUE self, VALUE indent)
{
+ rb_check_frozen(self);
GET_STATE(self);
RB_OBJ_WRITE(self, &state->indent, string_config(indent));
return Qnil;
@@ -1548,6 +1455,7 @@ static VALUE cState_space(VALUE self)
*/
static VALUE cState_space_set(VALUE self, VALUE space)
{
+ rb_check_frozen(self);
GET_STATE(self);
RB_OBJ_WRITE(self, &state->space, string_config(space));
return Qnil;
@@ -1571,6 +1479,7 @@ static VALUE cState_space_before(VALUE self)
*/
static VALUE cState_space_before_set(VALUE self, VALUE space_before)
{
+ rb_check_frozen(self);
GET_STATE(self);
RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
return Qnil;
@@ -1596,6 +1505,7 @@ static VALUE cState_object_nl(VALUE self)
*/
static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
{
+ rb_check_frozen(self);
GET_STATE(self);
RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
return Qnil;
@@ -1619,6 +1529,7 @@ static VALUE cState_array_nl(VALUE self)
*/
static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
{
+ rb_check_frozen(self);
GET_STATE(self);
RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
return Qnil;
@@ -1642,6 +1553,7 @@ static VALUE cState_as_json(VALUE self)
*/
static VALUE cState_as_json_set(VALUE self, VALUE as_json)
{
+ rb_check_frozen(self);
GET_STATE(self);
RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
return Qnil;
@@ -1673,7 +1585,21 @@ static VALUE cState_max_nesting(VALUE self)
static long long_config(VALUE num)
{
- return RTEST(num) ? FIX2LONG(num) : 0;
+ return RTEST(num) ? NUM2LONG(num) : 0;
+}
+
+// depth must never be negative; reject early with a clear error.
+static long depth_config(VALUE num)
+{
+ if (!RTEST(num)) return 0;
+ long d = NUM2LONG(num);
+ if (RB_UNLIKELY(d < 0)) {
+ rb_raise(rb_eArgError, "depth must be >= 0 (got %ld)", d);
+ }
+ if (RB_UNLIKELY(d > INT_MAX)) {
+ rb_raise(rb_eArgError, "depth is too large (got %ld)", d);
+ }
+ return d;
}
/*
@@ -1684,6 +1610,7 @@ static long long_config(VALUE num)
*/
static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
{
+ rb_check_frozen(self);
GET_STATE(self);
state->max_nesting = long_config(depth);
return Qnil;
@@ -1709,6 +1636,7 @@ static VALUE cState_script_safe(VALUE self)
*/
static VALUE cState_script_safe_set(VALUE self, VALUE enable)
{
+ rb_check_frozen(self);
GET_STATE(self);
state->script_safe = RTEST(enable);
return Qnil;
@@ -1740,6 +1668,7 @@ static VALUE cState_strict(VALUE self)
*/
static VALUE cState_strict_set(VALUE self, VALUE enable)
{
+ rb_check_frozen(self);
GET_STATE(self);
state->strict = RTEST(enable);
return Qnil;
@@ -1764,6 +1693,7 @@ static VALUE cState_allow_nan_p(VALUE self)
*/
static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
{
+ rb_check_frozen(self);
GET_STATE(self);
state->allow_nan = RTEST(enable);
return Qnil;
@@ -1788,11 +1718,25 @@ static VALUE cState_ascii_only_p(VALUE self)
*/
static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
{
+ rb_check_frozen(self);
GET_STATE(self);
state->ascii_only = RTEST(enable);
return Qnil;
}
+static VALUE cState_allow_duplicate_key_p(VALUE self)
+{
+ GET_STATE(self);
+ switch (state->on_duplicate_key) {
+ case JSON_IGNORE:
+ return Qtrue;
+ case JSON_DEPRECATED:
+ return Qnil;
+ default:
+ return Qfalse;
+ }
+}
+
/*
* call-seq: depth
*
@@ -1812,8 +1756,9 @@ static VALUE cState_depth(VALUE self)
*/
static VALUE cState_depth_set(VALUE self, VALUE depth)
{
+ rb_check_frozen(self);
GET_STATE(self);
- state->depth = long_config(depth);
+ state->depth = depth_config(depth);
return Qnil;
}
@@ -1845,6 +1790,7 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_
*/
static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
{
+ rb_check_frozen(self);
GET_STATE(self);
buffer_initial_length_set(state, buffer_initial_length);
return Qnil;
@@ -1877,13 +1823,15 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
- else if (key == sym_depth) { state->depth = long_config(val); }
+ else if (key == sym_depth) { state->depth = depth_config(val); }
else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); }
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
else if (key == sym_strict) { state->strict = RTEST(val); }
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_as_json) {
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
state_write_value(data, &state->as_json, proc);
}
return ST_CONTINUE;
@@ -1909,12 +1857,13 @@ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE con
static VALUE cState_configure(VALUE self, VALUE opts)
{
+ rb_check_frozen(self);
GET_STATE(self);
configure_state(state, self, opts);
return self;
}
-static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
+static VALUE cState_m_do_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io, generator_func func)
{
JSON_Generator_State state = {0};
state_init(&state);
@@ -1930,17 +1879,23 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
.buffer = &buffer,
.vstate = Qfalse,
.state = &state,
+ .depth = state.depth,
.obj = obj,
- .func = generate_json,
+ .func = func,
};
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
+}
- return fbuffer_finalize(&buffer);
+static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
+{
+ return cState_m_do_generate(klass, obj, opts, io, generate_json);
+}
+
+static VALUE cState_m_generate_no_fallback(VALUE klass, VALUE obj, VALUE opts, VALUE io)
+{
+ return cState_m_do_generate(klass, obj, opts, io, generate_json_no_fallback);
}
-/*
- *
- */
void Init_generator(void)
{
#ifdef HAVE_RB_EXT_RACTOR_SAFE
@@ -2005,45 +1960,12 @@ void Init_generator(void)
rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
rb_define_method(cState, "generate", cState_generate, -1);
- rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
+ rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1);
- rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
-
- VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
-
- VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");
- rb_define_method(mObject, "to_json", mObject_to_json, -1);
-
- VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash");
- rb_define_method(mHash, "to_json", mHash_to_json, -1);
-
- VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array");
- rb_define_method(mArray, "to_json", mArray_to_json, -1);
-
-#ifdef RUBY_INTEGER_UNIFICATION
- VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
- rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
-#else
- VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
- rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
- VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
- rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
-#endif
- VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float");
- rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
-
- VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
- rb_define_method(mString, "to_json", mString_to_json, -1);
-
- VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
- rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
-
- VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
- rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
-
- VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
- rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
+ rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
+ rb_define_singleton_method(cState, "_generate_no_fallback", cState_m_generate_no_fallback, 3);
rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
@@ -2051,10 +1973,6 @@ void Init_generator(void)
i_to_s = rb_intern("to_s");
i_to_json = rb_intern("to_json");
i_new = rb_intern("new");
- i_pack = rb_intern("pack");
- i_unpack = rb_intern("unpack");
- i_create_id = rb_intern("create_id");
- i_extend = rb_intern("extend");
i_encode = rb_intern("encode");
sym_indent = ID2SYM(rb_intern("indent"));
@@ -2071,6 +1989,7 @@ void Init_generator(void)
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
sym_strict = ID2SYM(rb_intern("strict"));
sym_as_json = ID2SYM(rb_intern("as_json"));
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
usascii_encindex = rb_usascii_encindex();
utf8_encindex = rb_utf8_encindex();
@@ -2078,22 +1997,5 @@ void Init_generator(void)
rb_require("json/ext/generator/state");
-
- switch (find_simd_implementation()) {
-#ifdef HAVE_SIMD
-#ifdef HAVE_SIMD_NEON
- case SIMD_NEON:
- search_escape_basic_impl = search_escape_basic_neon;
- break;
-#endif /* HAVE_SIMD_NEON */
-#ifdef HAVE_SIMD_SSE2
- case SIMD_SSE2:
- search_escape_basic_impl = search_escape_basic_sse2;
- break;
-#endif /* HAVE_SIMD_SSE2 */
-#endif /* HAVE_SIMD */
- default:
- search_escape_basic_impl = search_escape_basic;
- break;
- }
+ simd_impl = find_simd_implementation();
}
diff --git a/ext/json/json.h b/ext/json/json.h
new file mode 100644
index 0000000000..cf9420d4dd
--- /dev/null
+++ b/ext/json/json.h
@@ -0,0 +1,134 @@
+#ifndef _JSON_H_
+#define _JSON_H_
+
+#include "ruby.h"
+#include "ruby/encoding.h"
+#include <stdint.h>
+
+#ifndef RBIMPL_ASSERT_OR_ASSUME
+# define RBIMPL_ASSERT_OR_ASSUME(x)
+#endif
+
+#if defined(RUBY_DEBUG) && RUBY_DEBUG
+# define JSON_ASSERT RUBY_ASSERT
+#else
+# ifdef JSON_DEBUG
+# include <assert.h>
+# define JSON_ASSERT(x) assert(x)
+# else
+# define JSON_ASSERT(x)
+# endif
+#endif
+
+/* shims */
+
+#if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
+# define INT64T2NUM(x) LL2NUM(x)
+# define UINT64T2NUM(x) ULL2NUM(x)
+#elif SIZEOF_UINT64_T == SIZEOF_LONG
+# define INT64T2NUM(x) LONG2NUM(x)
+# define UINT64T2NUM(x) ULONG2NUM(x)
+#else
+# error No uint64_t conversion
+#endif
+
+/* This is the fallback definition from Ruby 3.4 */
+#ifndef RBIMPL_STDBOOL_H
+#if defined(__cplusplus)
+# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
+# include <cstdbool>
+# endif
+#elif defined(HAVE_STDBOOL_H)
+# include <stdbool.h>
+#elif !defined(HAVE__BOOL)
+typedef unsigned char _Bool;
+# define bool _Bool
+# define true ((_Bool)+1)
+# define false ((_Bool)+0)
+# define __bool_true_false_are_defined
+#endif
+#endif
+
+#ifndef HAVE_RUBY_XFREE_SIZED
+static inline void ruby_xfree_sized(void *ptr, size_t oldsize)
+{
+ ruby_xfree(ptr);
+}
+
+static inline void *ruby_xrealloc2_sized(void *ptr, size_t new_elems, size_t elem_size, size_t old_elems)
+{
+ return ruby_xrealloc2(ptr, new_elems, elem_size);
+}
+#endif
+
+# define JSON_SIZED_REALLOC_N(v, T, m, n) \
+ ((v) = (T *)ruby_xrealloc2_sized((void *)(v), (m), sizeof(T), (n)))
+
+# define JSON_SIZED_FREE(v) ruby_xfree_sized((void *)(v), sizeof(*(v)))
+# define JSON_SIZED_FREE_N(v, n) ruby_xfree_sized((void *)(v), sizeof(*(v)) * (n))
+
+#ifndef HAVE_RB_EXT_RACTOR_SAFE
+# undef RUBY_TYPED_FROZEN_SHAREABLE
+# define RUBY_TYPED_FROZEN_SHAREABLE 0
+#endif
+
+#ifdef RUBY_TYPED_EMBEDDABLE
+# define HAVE_RUBY_TYPED_EMBEDDABLE 1
+#else
+# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE
+# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE
+# define HAVE_RUBY_TYPED_EMBEDDABLE 1
+# else
+# define RUBY_TYPED_EMBEDDABLE 0
+# endif
+#endif
+
+#ifndef NORETURN
+#if defined(__has_attribute) && __has_attribute(noreturn)
+#define NORETURN(x) __attribute__((noreturn)) x
+#else
+#define NORETURN(x) x
+#endif
+#endif
+
+#ifndef NOINLINE
+#if defined(__has_attribute) && __has_attribute(noinline)
+#define NOINLINE(x) __attribute__((noinline)) x
+#else
+#define NOINLINE(x) x
+#endif
+#endif
+
+#ifndef ALWAYS_INLINE
+#if defined(__has_attribute) && __has_attribute(always_inline)
+#define ALWAYS_INLINE(x) inline __attribute__((always_inline)) x
+#else
+#define ALWAYS_INLINE(x) inline x
+#endif
+#endif
+
+#ifndef RB_UNLIKELY
+#define RB_UNLIKELY(expr) expr
+#endif
+
+#ifndef RB_LIKELY
+#define RB_LIKELY(expr) expr
+#endif
+
+#ifndef MAYBE_UNUSED
+# define MAYBE_UNUSED(x) x
+#endif
+
+#ifdef RUBY_DEBUG
+#ifndef JSON_DEBUG
+#define JSON_DEBUG RUBY_DEBUG
+#endif
+#endif
+
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX
+#define JSON_CPU_LITTLE_ENDIAN_64BITS 1
+#else
+#define JSON_CPU_LITTLE_ENDIAN_64BITS 0
+#endif
+
+#endif // _JSON_H_
diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb
index 0ebff2f948..26d601926f 100644
--- a/ext/json/lib/json.rb
+++ b/ext/json/lib/json.rb
@@ -6,6 +6,15 @@ require 'json/common'
#
# \JSON is a lightweight data-interchange format.
#
+# \JSON is easy for us humans to read and write,
+# and equally simple for machines to read (parse) and write (generate).
+#
+# \JSON is language-independent, making it an ideal interchange format
+# for applications in differing programming languages
+# and on differing operating systems.
+#
+# == \JSON Values
+#
# A \JSON value is one of the following:
# - Double-quoted text: <tt>"foo"</tt>.
# - Number: +1+, +1.0+, +2.0e2+.
@@ -173,6 +182,30 @@ require 'json/common'
# When enabled:
# JSON.parse('[1,]', allow_trailing_comma: true) # => [1]
#
+# ---
+#
+# Option +allow_control_characters+ (boolean) specifies whether to allow
+# unescaped ASCII control characters, such as newlines, in strings;
+# defaults to +false+.
+#
+# With the default, +false+:
+# JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError)
+#
+# When enabled:
+# JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld"
+#
+# ---
+#
+# Option +allow_invalid_escape+ (boolean) specifies whether to ignore backslahes that are followed
+# by an invalid escape character in strings;
+# defaults to +false+.
+#
+# With the default, +false+:
+# JSON.parse('"Hell\o"') # invalid escape character in string (JSON::ParserError)
+#
+# When enabled:
+# JSON.parse('"Hell\o"', allow_invalid_escape: true) # => "Hello"
+#
# ====== Output Options
#
# Option +freeze+ (boolean) specifies whether the returned objects will be frozen;
@@ -302,8 +335,27 @@ require 'json/common'
# JSON.generate(JSON::MinusInfinity)
#
# Allow:
-# ruby = [Float::NaN, Float::Infinity, Float::MinusInfinity]
-# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,-Infinity]'
+# ruby = [Float::NAN, Float::INFINITY, JSON::NaN, JSON::Infinity, JSON::MinusInfinity]
+# JSON.generate(ruby, allow_nan: true) # => '[NaN,Infinity,NaN,Infinity,-Infinity]'
+#
+# ---
+#
+# Option +allow_duplicate_key+ (boolean) specifies whether
+# hashes with duplicate keys should be allowed or produce an error.
+# defaults to emit a deprecation warning.
+#
+# With the default, (not set):
+# Warning[:deprecated] = true
+# JSON.generate({ foo: 1, "foo" => 2 })
+# # warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}.
+# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
+# # => '{"foo":1,"foo":2}'
+#
+# With <tt>false</tt>
+# JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false)
+# # detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError)
+#
+# In version 3.0, <tt>false</tt> will become the default.
#
# ---
#
@@ -384,6 +436,9 @@ require 'json/common'
#
# == \JSON Additions
#
+# Note that JSON Additions must only be used with trusted data, and is
+# deprecated.
+#
# When you "round trip" a non-\String object from Ruby to \JSON and back,
# you have a new \String, instead of the object you began with:
# ruby0 = Range.new(0, 2)
diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb
index e99d152a88..230bf08012 100644
--- a/ext/json/lib/json/common.rb
+++ b/ext/json/lib/json/common.rb
@@ -71,9 +71,14 @@ module JSON
end
when object_class
if opts[:create_additions] != false
- if class_name = object[JSON.create_id]
- klass = JSON.deep_const_get(class_name)
- if (klass.respond_to?(:json_creatable?) && klass.json_creatable?) || klass.respond_to?(:json_create)
+ if class_path = object[JSON.create_id]
+ klass = begin
+ Object.const_get(class_path)
+ rescue NameError => e
+ raise ArgumentError, "can't get const #{class_path}: #{e}"
+ end
+
+ if klass.respond_to?(:json_creatable?) ? klass.json_creatable? : klass.respond_to?(:json_create)
create_additions_warning if create_additions.nil?
object = klass.json_create(object)
end
@@ -97,7 +102,7 @@ module JSON
class << self
def deprecation_warning(message, uplevel = 3) # :nodoc:
- gem_root = File.expand_path("../../../", __FILE__) + "/"
+ gem_root = File.expand_path("..", __dir__) + "/"
caller_locations(uplevel, 10).each do |frame|
if frame.path.nil? || frame.path.start_with?(gem_root) || frame.path.end_with?("/truffle/cext_ruby.rb", ".c")
uplevel += 1
@@ -147,29 +152,21 @@ module JSON
const_set :Parser, parser
end
- # Return the constant located at _path_. The format of _path_ has to be
- # either ::A::B::C or A::B::C. In any case, A has to be located at the top
- # level (absolute namespace path?). If there doesn't exist a constant at
- # the given path, an ArgumentError is raised.
- def deep_const_get(path) # :nodoc:
- Object.const_get(path)
- rescue NameError => e
- raise ArgumentError, "can't get const #{path}: #{e}"
- end
-
# Set the module _generator_ to be used by JSON.
def generator=(generator) # :nodoc:
old, $VERBOSE = $VERBOSE, nil
@generator = generator
- generator_methods = generator::GeneratorMethods
- for const in generator_methods.constants
- klass = const_get(const)
- modul = generator_methods.const_get(const)
- klass.class_eval do
- instance_methods(false).each do |m|
- m.to_s == 'to_json' and remove_method m
+ if generator.const_defined?(:GeneratorMethods)
+ generator_methods = generator::GeneratorMethods
+ for const in generator_methods.constants
+ klass = const_get(const)
+ modul = generator_methods.const_get(const)
+ klass.class_eval do
+ instance_methods(false).each do |m|
+ m.to_s == 'to_json' and remove_method m
+ end
+ include modul
end
- include modul
end
end
self.state = generator::State
@@ -186,6 +183,25 @@ module JSON
private
+ # Called from the extension when a hash has both string and symbol keys
+ def on_mixed_keys_hash(hash, do_raise)
+ set = {}
+ hash.each_key do |key|
+ key_str = key.to_s
+
+ if set[key_str]
+ message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}"
+ if do_raise
+ raise GeneratorError, message
+ else
+ deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`")
+ end
+ else
+ set[key_str] = true
+ end
+ end
+ end
+
def deprecated_singleton_attr_accessor(*attrs)
args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : ""
attrs.each do |attr|
@@ -391,7 +407,7 @@ module JSON
#
# Returns a \String containing the generated \JSON data.
#
- # See also JSON.fast_generate, JSON.pretty_generate.
+ # See also JSON.pretty_generate.
#
# Argument +obj+ is the Ruby object to be converted to \JSON.
#
@@ -536,6 +552,7 @@ module JSON
:create_additions => nil,
}
# :call-seq:
+ # JSON.unsafe_load(source, options = {}) -> object
# JSON.unsafe_load(source, proc = nil, options = {}) -> object
#
# Returns the Ruby objects created by parsing the given +source+.
@@ -643,6 +660,7 @@ module JSON
# when Array
# obj.map! {|v| deserialize_obj v }
# end
+ # obj
# })
# pp ruby
# Output:
@@ -666,7 +684,12 @@ module JSON
#
def unsafe_load(source, proc = nil, options = nil)
opts = if options.nil?
- _unsafe_load_default_options
+ if proc && proc.is_a?(Hash)
+ options, proc = proc, nil
+ options
+ else
+ _unsafe_load_default_options
+ end
else
_unsafe_load_default_options.merge(options)
end
@@ -684,12 +707,17 @@ module JSON
if opts[:allow_blank] && (source.nil? || source.empty?)
source = 'null'
end
- result = parse(source, opts)
- recurse_proc(result, &proc) if proc
- result
+
+ if proc
+ opts = opts.dup
+ opts[:on_load] = proc.to_proc
+ end
+
+ parse(source, opts)
end
# :call-seq:
+ # JSON.load(source, options = {}) -> object
# JSON.load(source, proc = nil, options = {}) -> object
#
# Returns the Ruby objects created by parsing the given +source+.
@@ -803,6 +831,7 @@ module JSON
# when Array
# obj.map! {|v| deserialize_obj v }
# end
+ # obj
# })
# pp ruby
# Output:
@@ -825,8 +854,18 @@ module JSON
# @attributes={"type"=>"Admin", "password"=>"0wn3d"}>}
#
def load(source, proc = nil, options = nil)
+ if proc && options.nil? && proc.is_a?(Hash)
+ options = proc
+ proc = nil
+ end
+
opts = if options.nil?
- _load_default_options
+ if proc && proc.is_a?(Hash)
+ options, proc = proc, nil
+ options
+ else
+ _load_default_options
+ end
else
_load_default_options.merge(options)
end
@@ -841,7 +880,7 @@ module JSON
end
end
- if opts[:allow_blank] && (source.nil? || source.empty?)
+ if opts[:allow_blank] && (source.nil? || (String === source && source.empty?))
source = 'null'
end
@@ -999,7 +1038,8 @@ module JSON
# JSON.new(options = nil, &block)
#
# Argument +options+, if given, contains a \Hash of options for both parsing and generating.
- # See {Parsing Options}[#module-JSON-label-Parsing+Options], and {Generating Options}[#module-JSON-label-Generating+Options].
+ # See {Parsing Options}[rdoc-ref:JSON@Parsing+Options],
+ # and {Generating Options}[rdoc-ref:JSON@Generating+Options].
#
# For generation, the <tt>strict: true</tt> option is always set. When a Ruby object with no native \JSON counterpart is
# encountered, the block provided to the initialize method is invoked, and must return a Ruby object that has a native
@@ -1028,7 +1068,7 @@ module JSON
options[:as_json] = as_json if as_json
@state = State.new(options).freeze
- @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options))
+ @parser_config = Ext::Parser::Config.new(ParserOptions.prepare(options)).freeze
end
# call-seq:
@@ -1037,7 +1077,7 @@ module JSON
#
# Serialize the given object into a \JSON document.
def dump(object, io = nil)
- @state.generate_new(object, io)
+ @state.generate(object, io)
end
alias_method :generate, :dump
@@ -1058,6 +1098,30 @@ module JSON
load(File.read(path, encoding: Encoding::UTF_8))
end
end
+
+ module GeneratorMethods
+ # call-seq: to_json(*)
+ #
+ # Converts this object into a JSON string.
+ # If this object doesn't directly maps to a JSON native type,
+ # first convert it to a string (calling #to_s), then converts
+ # it to a JSON string, and returns the result.
+ # This is a fallback, if no special method #to_json was defined for some object.
+ def to_json(state = nil, *)
+ obj = case self
+ when nil, false, true, Integer, Float, Array, Hash
+ self
+ else
+ "#{self}"
+ end
+
+ if state.nil?
+ JSON::State._generate_no_fallback(obj, nil, nil)
+ else
+ JSON::State.from_state(state)._generate_no_fallback(obj)
+ end
+ end
+ end
end
module ::Kernel
@@ -1103,3 +1167,7 @@ module ::Kernel
JSON[object, opts]
end
end
+
+class Object
+ include JSON::GeneratorMethods
+end
diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb
index d40c3b5ec3..e4f425af6a 100644
--- a/ext/json/lib/json/ext/generator/state.rb
+++ b/ext/json/lib/json/ext/generator/state.rb
@@ -8,20 +8,8 @@ module JSON
#
# Instantiates a new State object, configured by _opts_.
#
- # _opts_ can have the following keys:
- #
- # * *indent*: a string used to indent levels (default: ''),
- # * *space*: a string that is put after, a : or , delimiter (default: ''),
- # * *space_before*: a string that is put before a : pair delimiter (default: ''),
- # * *object_nl*: a string that is put at the end of a JSON object (default: ''),
- # * *array_nl*: a string that is put at the end of a JSON array (default: ''),
- # * *allow_nan*: true if NaN, Infinity, and -Infinity should be
- # generated, otherwise an exception is thrown, if these values are
- # encountered. This options defaults to false.
- # * *ascii_only*: true if only ASCII characters should be generated. This
- # option defaults to false.
- # * *buffer_initial_length*: sets the initial length of the generator's
- # internal buffer.
+ # Argument +opts+, if given, contains a \Hash of options for the generation.
+ # See {Generating Options}[rdoc-ref:JSON@Generating+Options].
def initialize(opts = nil)
if opts && !opts.empty?
configure(opts)
@@ -68,6 +56,11 @@ module JSON
buffer_initial_length: buffer_initial_length,
}
+ allow_duplicate_key = allow_duplicate_key?
+ unless allow_duplicate_key.nil?
+ result[:allow_duplicate_key] = allow_duplicate_key
+ end
+
instance_variables.each do |iv|
iv = iv.to_s[1..-1]
result[iv.to_sym] = self[iv]
@@ -82,6 +75,8 @@ module JSON
#
# Returns the value returned by method +name+.
def [](name)
+ ::JSON.deprecation_warning("JSON::State#[] is deprecated and will be removed in json 3.0.0")
+
if respond_to?(name)
__send__(name)
else
@@ -94,6 +89,8 @@ module JSON
#
# Sets the attribute name to value.
def []=(name, value)
+ ::JSON.deprecation_warning("JSON::State#[]= is deprecated and will be removed in json 3.0.0")
+
if respond_to?(name_writer = "#{name}=")
__send__ name_writer, value
else
diff --git a/ext/json/lib/json/generic_object.rb b/ext/json/lib/json/generic_object.rb
index ec5aa9dcb2..5c8ace354b 100644
--- a/ext/json/lib/json/generic_object.rb
+++ b/ext/json/lib/json/generic_object.rb
@@ -52,14 +52,6 @@ module JSON
table
end
- def [](name)
- __send__(name)
- end unless method_defined?(:[])
-
- def []=(name, value)
- __send__("#{name}=", value)
- end unless method_defined?(:[]=)
-
def |(other)
self.class[other.to_hash.merge(to_hash)]
end
diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb
index f9ac3e17a9..30c0a71d2f 100644
--- a/ext/json/lib/json/version.rb
+++ b/ext/json/lib/json/version.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: true
module JSON
- VERSION = '2.13.2'
+ VERSION = '2.19.8'
end
diff --git a/ext/json/parser/depend b/ext/json/parser/depend
index 1bb03d3517..d4737b1dfb 100644
--- a/ext/json/parser/depend
+++ b/ext/json/parser/depend
@@ -175,6 +175,8 @@ parser.o: $(hdrdir)/ruby/ruby.h
parser.o: $(hdrdir)/ruby/st.h
parser.o: $(hdrdir)/ruby/subst.h
parser.o: $(srcdir)/../fbuffer/fbuffer.h
+parser.o: $(srcdir)/../json.h
parser.o: $(srcdir)/../simd/simd.h
+parser.o: $(srcdir)/../vendor/ryu.h
parser.o: parser.c
# AUTOGENERATED DEPENDENCIES END
diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb
index de5d5758b4..a9d740c755 100644
--- a/ext/json/parser/extconf.rb
+++ b/ext/json/parser/extconf.rb
@@ -1,10 +1,16 @@
# frozen_string_literal: true
require 'mkmf'
+$defs << "-DJSON_DEBUG" if ENV.fetch("JSON_DEBUG", "0") != "0"
have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
+have_func("rb_str_to_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
-have_func("strnlen", "string.h") # Missing on Solaris 10
+have_func("ruby_xfree_sized", "ruby.h") # RUBY_VERSION >= 4.1
+
+if RUBY_ENGINE == "ruby"
+ have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3
+end
append_cflags("-std=c99")
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 1e6ee753f0..c0631728c3 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1,50 +1,22 @@
-#include "ruby.h"
-#include "ruby/encoding.h"
-
-/* shims */
-/* This is the fallback definition from Ruby 3.4 */
-
-#ifndef RBIMPL_STDBOOL_H
-#if defined(__cplusplus)
-# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
-# include <cstdbool>
-# endif
-#elif defined(HAVE_STDBOOL_H)
-# include <stdbool.h>
-#elif !defined(HAVE__BOOL)
-typedef unsigned char _Bool;
-# define bool _Bool
-# define true ((_Bool)+1)
-# define false ((_Bool)+0)
-# define __bool_true_false_are_defined
-#endif
-#endif
-
+#include "../json.h"
+#include "../vendor/ryu.h"
#include "../simd/simd.h"
-#ifndef RB_UNLIKELY
-#define RB_UNLIKELY(expr) expr
-#endif
-
-#ifndef RB_LIKELY
-#define RB_LIKELY(expr) expr
-#endif
-
static VALUE mJSON, eNestingError, Encoding_UTF_8;
static VALUE CNaN, CInfinity, CMinusInfinity;
-static ID i_chr, i_aset, i_aref,
- i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
+static ID i_new, i_try_convert, i_uminus, i_encode;
-static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
- sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
+ sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
+ sym_allow_duplicate_key;
static int binary_encindex;
static int utf8_encindex;
#ifndef HAVE_RB_HASH_BULK_INSERT
// For TruffleRuby
-void
+static void
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
{
long index = 0;
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
#define rb_hash_new_capa(n) rb_hash_new()
#endif
+#ifndef HAVE_RB_STR_TO_INTERNED_STR
+static VALUE rb_str_to_interned_str(VALUE str)
+{
+ return rb_funcall(rb_str_freeze(str), i_uminus, 0);
+}
+#endif
/* name cache */
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
cache->entries[index] = rstring;
}
-static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
+#define rstring_cache_memcmp memcmp
+
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+#if __has_builtin(__builtin_bswap64)
+#undef rstring_cache_memcmp
+ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
{
- long rstring_length = RSTRING_LEN(rstring);
- if (length == rstring_length) {
- return memcmp(str, RSTRING_PTR(rstring), length);
- } else {
- return (int)(length - rstring_length);
+ // The libc memcmp has numerous complex optimizations, but in this particular case,
+ // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
+ // inline a simpler memcmp outperforms calling the libc version.
+ long i = 0;
+
+ for (; i + 8 <= length; i += 8) {
+ uint64_t a, b;
+ memcpy(&a, str + i, 8);
+ memcpy(&b, rptr + i, 8);
+ if (a != b) {
+ a = __builtin_bswap64(a);
+ b = __builtin_bswap64(b);
+ return (a < b) ? -1 : 1;
+ }
}
+
+ for (; i < length; i++) {
+ if (str[i] != rptr[i]) {
+ return (str[i] < rptr[i]) ? -1 : 1;
+ }
+ }
+
+ return 0;
}
+#endif
+#endif
-static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
{
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
- // Common names aren't likely to be very long. So we just don't
- // cache names above an arbitrary threshold.
- return Qfalse;
- }
+ const char *rstring_ptr;
+ long rstring_length;
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
- // Simple heuristic, if the first character isn't a letter,
- // we're much less likely to see this string again.
- // We mostly want to cache strings that are likely to be repeated.
- return Qfalse;
+ RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
+
+ if (length == rstring_length) {
+ return rstring_cache_memcmp(str, rstring_ptr, length);
+ } else {
+ return (int)(length - rstring_length);
}
+}
+ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
int low = 0;
int high = cache->length - 1;
- int mid = 0;
- int last_cmp = 0;
while (low <= high) {
- mid = (high + low) >> 1;
+ int mid = (high + low) >> 1;
VALUE entry = cache->entries[mid];
- last_cmp = rstring_cache_cmp(str, length, entry);
+ int cmp = rstring_cache_cmp(str, length, entry);
- if (last_cmp == 0) {
+ if (cmp == 0) {
return entry;
- } else if (last_cmp > 0) {
+ } else if (cmp > 0) {
low = mid + 1;
} else {
high = mid - 1;
}
}
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
- // We assume the overwhelming majority of names don't need to be escaped.
- // But if they do, we have to fallback to the slow path.
- return Qfalse;
- }
-
VALUE rstring = build_interned_string(str, length);
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
- if (last_cmp > 0) {
- mid += 1;
- }
-
- rvalue_cache_insert_at(cache, mid, rstring);
+ rvalue_cache_insert_at(cache, low, rstring);
}
return rstring;
}
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
{
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
- // Common names aren't likely to be very long. So we just don't
- // cache names above an arbitrary threshold.
- return Qfalse;
- }
-
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
- // Simple heuristic, if the first character isn't a letter,
- // we're much less likely to see this string again.
- // We mostly want to cache strings that are likely to be repeated.
- return Qfalse;
- }
-
int low = 0;
int high = cache->length - 1;
- int mid = 0;
- int last_cmp = 0;
while (low <= high) {
- mid = (high + low) >> 1;
+ int mid = (high + low) >> 1;
VALUE entry = cache->entries[mid];
- last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
+ int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
- if (last_cmp == 0) {
+ if (cmp == 0) {
return entry;
- } else if (last_cmp > 0) {
+ } else if (cmp > 0) {
low = mid + 1;
} else {
high = mid - 1;
}
}
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
- // We assume the overwhelming majority of names don't need to be escaped.
- // But if they do, we have to fallback to the slow path.
- return Qfalse;
- }
-
VALUE rsymbol = build_symbol(str, length);
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
- if (last_cmp > 0) {
- mid += 1;
- }
-
- rvalue_cache_insert_at(cache, mid, rsymbol);
+ rvalue_cache_insert_at(cache, low, rsymbol);
}
return rsymbol;
}
@@ -245,7 +211,7 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu
if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
stack = rvalue_stack_spill(stack, handle, stack_ref);
} else {
- REALLOC_N(stack->ptr, VALUE, required);
+ JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa);
stack->capa = required;
}
return stack;
@@ -275,35 +241,62 @@ static void rvalue_stack_mark(void *ptr)
{
rvalue_stack *stack = (rvalue_stack *)ptr;
long index;
- for (index = 0; index < stack->head; index++) {
- rb_gc_mark(stack->ptr[index]);
+ if (stack && stack->ptr) {
+ for (index = 0; index < stack->head; index++) {
+ rb_gc_mark_movable(stack->ptr[index]);
+ }
}
}
+static void rvalue_stack_free_buffer(rvalue_stack *stack)
+{
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
+ stack->ptr = NULL;
+}
+
static void rvalue_stack_free(void *ptr)
{
rvalue_stack *stack = (rvalue_stack *)ptr;
if (stack) {
- ruby_xfree(stack->ptr);
- ruby_xfree(stack);
+ rvalue_stack_free_buffer(stack);
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ JSON_SIZED_FREE(stack);
+#endif
}
}
static size_t rvalue_stack_memsize(const void *ptr)
{
const rvalue_stack *stack = (const rvalue_stack *)ptr;
- return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
+ size_t memsize = sizeof(VALUE) * stack->capa;
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ memsize += sizeof(rvalue_stack);
+#endif
+ return memsize;
+}
+
+static void rvalue_stack_compact(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ long index;
+ if (stack && stack->ptr) {
+ for (index = 0; index < stack->head; index++) {
+ stack->ptr[index] = rb_gc_location(stack->ptr[index]);
+ }
+ }
}
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
- "JSON::Ext::Parser/rvalue_stack",
- {
+ .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
+ .function = {
.dmark = rvalue_stack_mark,
.dfree = rvalue_stack_free,
.dsize = rvalue_stack_memsize,
+ .dcompact = rvalue_stack_compact,
},
- 0, 0,
- RUBY_TYPED_FREE_IMMEDIATELY,
+ // We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED
+ // because it churns a lot of values so trigering write barriers every time is very costly.
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
};
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
@@ -325,19 +318,206 @@ static void rvalue_stack_eagerly_release(VALUE handle)
if (handle) {
rvalue_stack *stack;
TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
- RTYPEDDATA_DATA(handle) = NULL;
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ rvalue_stack_free_buffer(stack);
+#else
rvalue_stack_free(stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+#endif
+ }
+}
+
+/* frame stack */
+
+// Iterative (non-recursive) parsing keeps an explicit stack of the containers
+// currently being built, instead of relying on the C call stack. Each frame
+// only needs enough bookkeeping to close its container: which kind it is, the
+// rvalue_stack position where its children start (so we know how many to pop),
+// and the cursor at its opening brace (used to rewind for duplicate key
+// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses
+// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that
+// it's freed even if parsing raises.
+//
+// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release
+// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts
+// -- the element type and the absence of a mark function are the only real
+// differences. Keep the two in sync: a fix to the spill/release or
+// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the
+// other.
+#define JSON_FRAME_STACK_INITIAL_CAPA 32
+
+enum json_frame_type {
+ JSON_FRAME_ROOT, // == JSON_PHASE_DONE
+ JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
+ JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
+};
+
+// Where a frame is within its container's grammar. This is the entirety of the
+// parser's "what to do next" state: json_parse_any dispatches on the top
+// frame's phase and holds no resume state in C locals, so a parse can stop at
+// any value boundary and be resumed purely from the (persistable) frame stack.
+//
+// The first three phases are deliberately equal to the corresponding json_frame_type
+// to simplify the transition of phase in json_value_completed.
+enum json_frame_phase {
+ JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
+ JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
+ JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
+ JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
+ JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
+ JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
+};
+
+typedef struct json_frame_struct {
+ enum json_frame_type type;
+ enum json_frame_phase phase;
+ long value_stack_head; // rvalue_stack->head when this container opened
+ const char *start_cursor; // object frames only (the '{'); NULL otherwise
+} json_frame;
+
+typedef struct json_frame_stack_struct {
+ enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated
+ long capa;
+ long head;
+ json_frame *ptr;
+} json_frame_stack;
+
+enum duplicate_key_action {
+ JSON_DEPRECATED = 0,
+ JSON_IGNORE,
+ JSON_RAISE,
+};
+
+typedef struct JSON_ParserStruct {
+ VALUE on_load_proc;
+ VALUE decimal_class;
+ ID decimal_method_id;
+ enum duplicate_key_action on_duplicate_key;
+ int max_nesting;
+ bool allow_nan;
+ bool allow_trailing_comma;
+ bool allow_control_characters;
+ bool allow_invalid_escape;
+ bool symbolize_names;
+ bool freeze;
+} JSON_ParserConfig;
+
+typedef struct JSON_ParserStateStruct {
+ VALUE *value_stack_handle;
+ VALUE *frame_stack_handle;
+ const char *start;
+ const char *cursor;
+ const char *end;
+ rvalue_stack *value_stack;
+ json_frame_stack *frames;
+ rvalue_cache name_cache;
+ int in_array;
+ int current_nesting;
+ unsigned int emitted_deprecations;
+} JSON_ParserState;
+
+static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref);
+
+static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = json_frame_stack_spill(stack, handle, stack_ref);
+ } else {
+ JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa);
+ stack->capa = required;
}
+ return stack;
}
+static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame)
+{
+ json_frame_stack *stack = state->frames;
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames);
+ }
+
+ json_frame *frame_ptr = &stack->ptr[stack->head++];
+ *frame_ptr = frame;
+ return frame_ptr;
+}
-#ifndef HAVE_STRNLEN
-static size_t strnlen(const char *s, size_t maxlen)
+static inline json_frame *json_frame_stack_peek(json_frame_stack *stack)
{
- char *p;
- return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
+ return &stack->ptr[stack->head - 1];
}
+
+static inline void json_frame_stack_pop(json_frame_stack *stack)
+{
+ stack->head--;
+}
+
+static void json_frame_stack_free_buffer(json_frame_stack *stack)
+{
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
+ stack->ptr = NULL;
+}
+
+static void json_frame_stack_free(void *ptr)
+{
+ json_frame_stack *stack = (json_frame_stack *)ptr;
+ if (stack) {
+ json_frame_stack_free_buffer(stack);
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ JSON_SIZED_FREE(stack);
#endif
+ }
+}
+
+static size_t json_frame_stack_memsize(const void *ptr)
+{
+ const json_frame_stack *stack = (const json_frame_stack *)ptr;
+
+ size_t memsize = sizeof(json_frame) * stack->capa;
+#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
+ memsize += sizeof(json_frame_stack);
+#endif
+ return memsize;
+}
+
+static const rb_data_type_t JSON_Parser_frame_stack_type = {
+ .wrap_struct_name = "JSON::Ext::Parser/frame_stack",
+ .function = {
+ .dmark = NULL,
+ .dfree = json_frame_stack_free,
+ .dsize = json_frame_stack_memsize,
+ },
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
+};
+
+static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
+{
+ json_frame_stack *stack;
+ *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, json_frame_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(json_frame, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head);
+ return stack;
+}
+
+static void json_frame_stack_eagerly_release(VALUE handle)
+{
+ if (handle) {
+ json_frame_stack *stack;
+ TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ json_frame_stack_free_buffer(stack);
+#else
+ json_frame_stack_free(stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+#endif
+ }
+}
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
{
@@ -365,38 +545,31 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
return len;
}
-enum duplicate_key_action {
- JSON_DEPRECATED = 0,
- JSON_IGNORE,
- JSON_RAISE,
-};
+static inline size_t rest(JSON_ParserState *state) {
+ return state->end - state->cursor;
+}
-typedef struct JSON_ParserStruct {
- VALUE on_load_proc;
- VALUE decimal_class;
- ID decimal_method_id;
- enum duplicate_key_action on_duplicate_key;
- int max_nesting;
- bool allow_nan;
- bool allow_trailing_comma;
- bool parsing_name;
- bool symbolize_names;
- bool freeze;
-} JSON_ParserConfig;
+static inline bool eos(JSON_ParserState *state) {
+ return state->cursor >= state->end;
+}
-typedef struct JSON_ParserStateStruct {
- VALUE stack_handle;
- const char *start;
- const char *cursor;
- const char *end;
- rvalue_stack *stack;
- rvalue_cache name_cache;
- int in_array;
- int current_nesting;
-} JSON_ParserState;
+static inline char peek(JSON_ParserState *state)
+{
+ if (RB_UNLIKELY(eos(state))) {
+ return 0;
+ }
+ return *state->cursor;
+}
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
{
+ JSON_ASSERT(state->cursor <= state->end);
+
+ // Redundant but helpful for hardening
+ if (RB_UNLIKELY(state->cursor > state->end)) {
+ state->cursor = state->end;
+ }
+
const char *cursor = state->cursor;
long column = 0;
long line = 1;
@@ -428,14 +601,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
#define PARSE_ERROR_FRAGMENT_LEN 32
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_parse_error(const char *format, JSON_ParserState *state)
+static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
{
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
- long line, column;
- cursor_position(state, &line, &column);
const char *ptr = "EOF";
if (state->cursor && state->cursor < state->end) {
@@ -467,20 +635,28 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
}
}
- VALUE msg = rb_sprintf(format, ptr);
- VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
- RB_GC_GUARD(msg);
+ VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
+ rb_str_catf(message, " at line %ld column %ld", line, column);
+ return message;
+}
+static VALUE parse_error_new(VALUE message, long line, long column)
+{
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
- rb_exc_raise(exc);
+ return exc;
}
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
+NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
+{
+ long line, column;
+ cursor_position(state, &line, &column);
+ VALUE message = build_parse_error_message(format, state, line, column);
+ rb_exc_raise(parse_error_new(message, line, column));
+}
+
+NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
{
state->cursor = at;
raise_parse_error(format, state);
@@ -505,23 +681,24 @@ static const signed char digit_values[256] = {
-1, -1, -1, -1, -1, -1, -1
};
-static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
-{
- signed char b;
- uint32_t result = 0;
- b = digit_values[p[0]];
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[1]];
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[2]];
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
- result = (result << 4) | (unsigned char)b;
- b = digit_values[p[3]];
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
- result = (result << 4) | (unsigned char)b;
- return result;
+static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
+{
+ if (RB_UNLIKELY(sp > spe - 4)) {
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+ }
+
+ const unsigned char *p = (const unsigned char *)sp;
+
+ const signed char b0 = digit_values[p[0]];
+ const signed char b1 = digit_values[p[1]];
+ const signed char b2 = digit_values[p[2]];
+ const signed char b3 = digit_values[p[3]];
+
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+ }
+
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
}
#define GET_PARSER_CONFIG \
@@ -530,61 +707,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
static const rb_data_type_t JSON_ParserConfig_type;
-static const bool whitespace[256] = {
- [' '] = 1,
- ['\t'] = 1,
- ['\n'] = 1,
- ['\r'] = 1,
- ['/'] = 1,
-};
-
-static void
+NOINLINE(static) void
json_eat_comments(JSON_ParserState *state)
{
- if (state->cursor + 1 < state->end) {
- switch (state->cursor[1]) {
- case '/': {
- state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
- if (!state->cursor) {
- state->cursor = state->end;
- } else {
- state->cursor++;
- }
- break;
+ const char *start = state->cursor;
+ state->cursor++;
+
+ switch (peek(state)) {
+ case '/': {
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
+ if (!state->cursor) {
+ state->cursor = state->end;
+ } else {
+ state->cursor++;
}
- case '*': {
- state->cursor += 2;
- while (true) {
- state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
- if (!state->cursor) {
- raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
- } else {
- state->cursor++;
- if (state->cursor < state->end && *state->cursor == '/') {
- state->cursor++;
- break;
- }
- }
+ break;
+ }
+ case '*': {
+ state->cursor++;
+
+ while (true) {
+ const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
+ if (!next_match) {
+ raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
+ }
+
+ state->cursor = next_match + 1;
+ if (peek(state) == '/') {
+ state->cursor++;
+ break;
}
- break;
}
- default:
- raise_parse_error("unexpected token %s", state);
- break;
+ break;
}
- } else {
- raise_parse_error("unexpected token %s", state);
+ default:
+ raise_parse_error_at("unexpected token %s", state, start);
+ break;
}
}
-static inline void
+ALWAYS_INLINE(static) void
json_eat_whitespace(JSON_ParserState *state)
{
- while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
- if (RB_LIKELY(*state->cursor != '/')) {
- state->cursor++;
- } else {
- json_eat_comments(state);
+ while (true) {
+ switch (peek(state)) {
+ case ' ':
+ state->cursor++;
+ break;
+ case '\n':
+ state->cursor++;
+
+ // Heuristic: if we see a newline, there is likely consecutive spaces after it.
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+ while (rest(state) > 8) {
+ uint64_t chunk;
+ memcpy(&chunk, state->cursor, sizeof(uint64_t));
+ if (chunk == 0x2020202020202020) {
+ state->cursor += 8;
+ continue;
+ }
+
+ uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
+ state->cursor += consecutive_spaces;
+ break;
+ }
+#endif
+ break;
+ case '\t':
+ case '\r':
+ state->cursor++;
+ break;
+ case '/':
+ json_eat_comments(state);
+ break;
+
+ default:
+ return;
}
}
}
@@ -615,11 +813,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
return result;
}
-static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
+static inline bool json_string_cacheable_p(const char *string, size_t length)
{
+ // We mostly want to cache strings that are likely to be repeated.
+ // Simple heuristics:
+ // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
+ // - If the first character isn't a letter, we're much less likely to see this string again.
+ return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
+}
+
+static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
+{
+ bool intern = is_name || config->freeze;
+ bool symbolize = is_name && config->symbolize_names;
size_t bufferSize = stringEnd - string;
- if (is_name && state->in_array) {
+ if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
VALUE cached_key;
if (RB_UNLIKELY(symbolize)) {
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
@@ -635,104 +844,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
return build_string(string, stringEnd, intern, symbolize);
}
-static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
-{
- size_t bufferSize = stringEnd - string;
- const char *p = string, *pe = string, *unescape, *bufferStart;
- char *buffer;
- int unescape_len;
- char buf[4];
+#define JSON_MAX_UNESCAPE_POSITIONS 16
+typedef struct _json_unescape_positions {
+ long size;
+ const char **positions;
+ unsigned long additional_backslashes;
+} JSON_UnescapePositions;
- if (is_name && state->in_array) {
- VALUE cached_key;
- if (RB_UNLIKELY(symbolize)) {
- cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
- } else {
- cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
+static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
+{
+ while (positions->size) {
+ positions->size--;
+ const char *next_position = positions->positions[0];
+ positions->positions++;
+ if (next_position >= pe) {
+ return next_position;
}
+ }
- if (RB_LIKELY(cached_key)) {
- return cached_key;
- }
+ if (positions->additional_backslashes) {
+ positions->additional_backslashes--;
+ return memchr(pe, '\\', stringEnd - pe);
}
+ return NULL;
+}
+
+NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
+{
+ bool intern = is_name || config->freeze;
+ bool symbolize = is_name && config->symbolize_names;
+ size_t bufferSize = stringEnd - string;
+ const char *p = string, *pe = string, *bufferStart;
+ char *buffer;
+
VALUE result = rb_str_buf_new(bufferSize);
rb_enc_associate_index(result, utf8_encindex);
buffer = RSTRING_PTR(result);
bufferStart = buffer;
- while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
- unescape = (char *) "?";
- unescape_len = 1;
+#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
+
+ while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
if (pe > p) {
MEMCPY(buffer, p, char, pe - p);
buffer += pe - p;
}
switch (*++pe) {
+ case '"':
+ case '/':
+ p = pe; // nothing to unescape just need to skip the backslash
+ break;
+ case '\\':
+ APPEND_CHAR('\\');
+ break;
case 'n':
- unescape = (char *) "\n";
+ APPEND_CHAR('\n');
break;
case 'r':
- unescape = (char *) "\r";
+ APPEND_CHAR('\r');
break;
case 't':
- unescape = (char *) "\t";
- break;
- case '"':
- unescape = (char *) "\"";
- break;
- case '\\':
- unescape = (char *) "\\";
+ APPEND_CHAR('\t');
break;
case 'b':
- unescape = (char *) "\b";
+ APPEND_CHAR('\b');
break;
case 'f':
- unescape = (char *) "\f";
+ APPEND_CHAR('\f');
break;
- case 'u':
- if (pe > stringEnd - 5) {
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
- } else {
- uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
- pe += 3;
- /* To handle values above U+FFFF, we take a sequence of
- * \uXXXX escapes in the U+D800..U+DBFF then
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
- * to make a 20-bit number, then add 0x10000 to get the
- * final codepoint.
- *
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
- * Area".
- */
- if ((ch & 0xFC00) == 0xD800) {
- pe++;
- if (pe > stringEnd - 6) {
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
- }
- if (pe[0] == '\\' && pe[1] == 'u') {
- uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
- | (sur & 0x3FF));
- pe += 5;
- } else {
- unescape = (char *) "?";
- break;
+ case 'u': {
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
+ pe += 3;
+ /* To handle values above U+FFFF, we take a sequence of
+ * \uXXXX escapes in the U+D800..U+DBFF then
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
+ * to make a 20-bit number, then add 0x10000 to get the
+ * final codepoint.
+ *
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
+ * Area".
+ */
+ if ((ch & 0xFC00) == 0xD800) {
+ pe++;
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
+
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
}
+
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
+ pe += 5;
+ } else {
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
+ break;
}
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
- unescape = buf;
}
+
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
+ buffer += unescape_len;
+ p = ++pe;
break;
+ }
default:
- p = pe;
- continue;
+ if ((unsigned char)*pe < 0x20) {
+ if (!config->allow_control_characters) {
+ if (*pe == '\n') {
+ raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
+ }
+ raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
+ }
+ }
+
+ if (config->allow_invalid_escape) {
+ APPEND_CHAR(*pe);
+ } else {
+ raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
+ }
+ break;
}
- MEMCPY(buffer, unescape, char, unescape_len);
- buffer += unescape_len;
- p = ++pe;
}
+#undef APPEND_CHAR
if (stringEnd > p) {
MEMCPY(buffer, p, char, stringEnd - p);
@@ -743,87 +977,99 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
if (symbolize) {
result = rb_str_intern(result);
} else if (intern) {
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
+ result = rb_str_to_interned_str(result);
}
return result;
}
#define MAX_FAST_INTEGER_SIZE 18
-static inline VALUE fast_decode_integer(const char *p, const char *pe)
-{
- bool negative = false;
- if (*p == '-') {
- negative = true;
- p++;
- }
+#define MAX_NUMBER_STACK_BUFFER 128
- long long memo = 0;
- while (p < pe) {
- memo *= 10;
- memo += *p - '0';
- p++;
- }
+typedef VALUE (*json_number_decode_func_t)(const char *ptr);
- if (negative) {
- memo = -memo;
+static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
+{
+ if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
+ char buffer[MAX_NUMBER_STACK_BUFFER];
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ return func(buffer);
+ } else {
+ VALUE buffer_v = rb_str_tmp_new(len);
+ char *buffer = RSTRING_PTR(buffer_v);
+ MEMCPY(buffer, start, char, len);
+ buffer[len] = '\0';
+ VALUE number = func(buffer);
+ RB_GC_GUARD(buffer_v);
+ return number;
}
- return LL2NUM(memo);
}
-static VALUE json_decode_large_integer(const char *start, long len)
+static VALUE json_decode_inum(const char *buffer)
+{
+ return rb_cstr2inum(buffer, 10);
+}
+
+NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
{
- VALUE buffer_v;
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
- MEMCPY(buffer, start, char, len);
- buffer[len] = '\0';
- VALUE number = rb_cstr2inum(buffer, 10);
- RB_ALLOCV_END(buffer_v);
- return number;
+ return json_decode_large_number(start, len, json_decode_inum);
}
-static inline VALUE
-json_decode_integer(const char *start, const char *end)
+static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
{
- long len = end - start;
- if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
- return fast_decode_integer(start, end);
+ if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
+ if (negative) {
+ return INT64T2NUM(-((int64_t)mantissa));
}
- return json_decode_large_integer(start, len);
+ return UINT64T2NUM(mantissa);
+ }
+
+ return json_decode_large_integer(start, end - start);
}
-static VALUE json_decode_large_float(const char *start, long len)
+static VALUE json_decode_dnum(const char *buffer)
{
- VALUE buffer_v;
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
- MEMCPY(buffer, start, char, len);
- buffer[len] = '\0';
- VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
- RB_ALLOCV_END(buffer_v);
- return number;
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
}
-static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
+NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
{
- long len = end - start;
+ return json_decode_large_number(start, len, json_decode_dnum);
+}
+/* Ruby JSON optimized float decoder using vendored Ryu algorithm
+ * Accepts pre-extracted mantissa and exponent from first-pass validation
+ */
+static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
+ const char *start, const char *end)
+{
if (RB_UNLIKELY(config->decimal_class)) {
- VALUE text = rb_str_new(start, len);
+ VALUE text = rb_str_new(start, end - start);
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
- } else if (RB_LIKELY(len < 64)) {
- char buffer[64];
- MEMCPY(buffer, start, char, len);
- buffer[len] = '\0';
- return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
- } else {
- return json_decode_large_float(start, len);
}
+
+ if (RB_UNLIKELY(exponent > INT32_MAX)) {
+ return negative ? CMinusInfinity : CInfinity;
+ }
+
+ if (RB_UNLIKELY(exponent < INT32_MIN)) {
+ return rb_float_new(negative ? -0.0 : 0.0);
+ }
+
+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
+ if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
+ return json_decode_large_float(start, end - start);
+ }
+
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
}
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
{
- VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
- rvalue_stack_pop(state->stack, count);
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count));
+ rvalue_stack_pop(state->value_stack, count);
if (config->freeze) {
RB_OBJ_FREEZE(array);
@@ -849,7 +1095,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
return Qfalse;
}
-static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
+NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
{
VALUE message = rb_sprintf(
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
@@ -860,41 +1106,52 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
RB_GC_GUARD(message);
}
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
+NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
{
VALUE message = rb_sprintf(
"duplicate key %"PRIsVALUE,
rb_inspect(duplicate_key)
);
- raise_parse_error(RSTRING_PTR(message), state);
- RB_GC_GUARD(message);
+ long line, column;
+ cursor_position(state, &line, &column);
+ rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
+ rb_exc_raise(parse_error_new(message, line, column));
+}
+
+NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs)
+{
+ switch (config->on_duplicate_key) {
+ case JSON_IGNORE:
+ return;
+
+ case JSON_DEPRECATED:
+ // Only emit the first few deprecations to avoid spamming.
+ if (state->emitted_deprecations < 5) {
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
+ state->emitted_deprecations++;
+ }
+ return;
+
+ case JSON_RAISE:
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
+ return;
+ }
+ UNREACHABLE;
}
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
{
size_t entries_count = count / 2;
VALUE object = rb_hash_new_capa(entries_count);
- const VALUE *pairs = rvalue_stack_peek(state->stack, count);
+ const VALUE *pairs = rvalue_stack_peek(state->value_stack, count);
rb_hash_bulk_insert(count, pairs, object);
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
- switch (config->on_duplicate_key) {
- case JSON_IGNORE:
- break;
- case JSON_DEPRECATED:
- emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
- break;
- case JSON_RAISE:
- raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
- break;
- }
+ json_on_duplicate_key(state, config, count, pairs);
}
- rvalue_stack_pop(state->stack, count);
+ rvalue_stack_pop(state->value_stack, count);
if (config->freeze) {
RB_OBJ_FREEZE(object);
@@ -903,26 +1160,12 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
return object;
}
-static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
-{
- VALUE string;
- bool intern = is_name || config->freeze;
- bool symbolize = is_name && config->symbolize_names;
- if (escaped) {
- string = json_string_unescape(state, start, end, is_name, intern, symbolize);
- } else {
- string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
- }
-
- return string;
-}
-
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
{
if (RB_UNLIKELY(config->on_load_proc)) {
value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
}
- rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
+ rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack);
return value;
}
@@ -939,17 +1182,11 @@ static const bool string_scan_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
-#if (defined(__GNUC__ ) || defined(__clang__))
-#define FORCE_INLINE __attribute__((always_inline))
-#else
-#define FORCE_INLINE
-#endif
-
#ifdef HAVE_SIMD
static SIMD_Implementation simd_impl = SIMD_NONE;
#endif /* HAVE_SIMD */
-static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
+ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
{
#ifdef HAVE_SIMD
#if defined(HAVE_SIMD_NEON)
@@ -957,7 +1194,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
uint64_t mask = 0;
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
state->cursor += trailing_zeros64(mask) >> 2;
- return 1;
+ return true;
}
#elif defined(HAVE_SIMD_SSE2)
@@ -965,313 +1202,574 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
int mask = 0;
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
state->cursor += trailing_zeros(mask);
- return 1;
+ return true;
}
}
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
#endif /* HAVE_SIMD */
- while (state->cursor < state->end) {
+ while (!eos(state)) {
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
- return 1;
+ return true;
}
- *state->cursor++;
+ state->cursor++;
}
- return 0;
+
+ // If the string ended with an unterminated escape sequence, we might
+ // have gone past the end.
+ if (RB_UNLIKELY(state->cursor > state->end)) {
+ state->cursor = state->end;
+ }
+
+ return false;
}
-static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
+static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
{
- state->cursor++;
- const char *start = state->cursor;
- bool escaped = false;
+ const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
+ JSON_UnescapePositions positions = {
+ .size = 0,
+ .positions = backslashes,
+ .additional_backslashes = 0,
+ };
- while (RB_UNLIKELY(string_scan(state))) {
+ do {
switch (*state->cursor) {
case '"': {
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
+ VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
state->cursor++;
- return json_push_value(state, config, string);
+ return string;
}
case '\\': {
- state->cursor++;
- escaped = true;
- if ((unsigned char)*state->cursor < 0x20) {
- raise_parse_error("invalid ASCII control character in string: %s", state);
+ if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
+ backslashes[positions.size] = state->cursor;
+ positions.size++;
+ } else {
+ positions.additional_backslashes++;
}
+ state->cursor++;
break;
}
default:
- raise_parse_error("invalid ASCII control character in string: %s", state);
+ if (!config->allow_control_characters) {
+ raise_parse_error("invalid ASCII control character in string: %s", state);
+ }
break;
}
state->cursor++;
- }
+ } while (string_scan(state));
raise_parse_error("unexpected end of input, expected closing \"", state);
return Qfalse;
}
-static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
+ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
{
- json_eat_whitespace(state);
- if (state->cursor >= state->end) {
- raise_parse_error("unexpected end of input", state);
+ state->cursor++;
+ const char *start = state->cursor;
+
+ if (RB_UNLIKELY(!string_scan(state))) {
+ raise_parse_error("unexpected end of input, expected closing \"", state);
}
- switch (*state->cursor) {
- case 'n':
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
- state->cursor += 4;
- return json_push_value(state, config, Qnil);
- }
+ VALUE string;
+ if (RB_LIKELY(*state->cursor == '"')) {
+ string = json_string_fastpath(state, config, start, state->cursor, is_name);
+ state->cursor++;
+ }
+ else {
+ string = json_parse_escaped_string(state, config, is_name, start);
+ }
- raise_parse_error("unexpected token %s", state);
- break;
- case 't':
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
- state->cursor += 4;
- return json_push_value(state, config, Qtrue);
- }
+ return string;
+}
- raise_parse_error("unexpected token %s", state);
- break;
- case 'f':
- // Note: memcmp with a small power of two compile to an integer comparison
- if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
- state->cursor += 5;
- return json_push_value(state, config, Qfalse);
- }
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
+// Additional References:
+// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
+// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
+static inline uint64_t decode_8digits_unrolled(uint64_t val) {
+ const uint64_t mask = 0x000000FF000000FF;
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+ val -= 0x3030303030303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+ return val;
+}
- raise_parse_error("unexpected token %s", state);
- break;
- case 'N':
- // Note: memcmp with a small power of two compile to an integer comparison
- if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
- state->cursor += 3;
- return json_push_value(state, config, CNaN);
- }
+static inline uint64_t decode_4digits_unrolled(uint32_t val) {
+ const uint32_t mask = 0x000000FF;
+ const uint32_t mul1 = 100;
+ val -= 0x30303030;
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
+ return val;
+}
+#endif
- raise_parse_error("unexpected token %s", state);
- break;
- case 'I':
- if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
- state->cursor += 8;
- return json_push_value(state, config, CInfinity);
- }
+static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
+{
+ const char *start = state->cursor;
- raise_parse_error("unexpected token %s", state);
- break;
- case '-':
- // Note: memcmp with a small power of two compile to an integer comparison
- if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
- if (config->allow_nan) {
- state->cursor += 9;
- return json_push_value(state, config, CMinusInfinity);
- } else {
- raise_parse_error("unexpected token %s", state);
- }
- }
- // Fallthrough
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
- bool integer = true;
+#if JSON_CPU_LITTLE_ENDIAN_64BITS
+ while (rest(state) >= sizeof(uint64_t)) {
+ uint64_t next_8bytes;
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
+
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
+
+ if (match == 0x3333333333333333) { // 8 consecutive digits
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
+ state->cursor += 8;
+ continue;
+ }
+
+ uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
- // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
- const char *start = state->cursor;
+ if (consecutive_digits >= 4) {
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
+ state->cursor += 4;
+ consecutive_digits -= 4;
+ }
+
+ while (consecutive_digits) {
+ *accumulator = *accumulator * 10 + (*state->cursor - '0');
+ consecutive_digits--;
state->cursor++;
+ }
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
- state->cursor++;
- }
+ return (int)(state->cursor - start);
+ }
+#endif
+
+ char next_char;
+ while (rb_isdigit(next_char = peek(state))) {
+ *accumulator = *accumulator * 10 + (next_char - '0');
+ state->cursor++;
+ }
+ return (int)(state->cursor - start);
+}
- long integer_length = state->cursor - start;
+static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
+{
+ bool integer = true;
+ const char first_digit = *state->cursor;
- if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
- raise_parse_error_at("invalid number: %s", state, start);
- } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
- raise_parse_error_at("invalid number: %s", state, start);
- } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
- raise_parse_error_at("invalid number: %s", state, start);
- }
+ // Variables for Ryu optimization - extract digits during parsing
+ int64_t exponent = 0;
+ int decimal_point_pos = -1;
+ uint64_t mantissa = 0;
- if ((state->cursor < state->end) && (*state->cursor == '.')) {
- integer = false;
- state->cursor++;
+ // Parse integer part and extract mantissa digits
+ int mantissa_digits = json_parse_digits(state, &mantissa);
+
+ if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+
+ // Parse fractional part
+ if (peek(state) == '.') {
+ integer = false;
+ decimal_point_pos = mantissa_digits; // Remember position of decimal point
+ state->cursor++;
+
+ int fractional_digits = json_parse_digits(state, &mantissa);
+ mantissa_digits += fractional_digits;
+
+ if (RB_UNLIKELY(!fractional_digits)) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+ }
+
+ // Parse exponent
+ if (rb_tolower(peek(state)) == 'e') {
+ integer = false;
+ state->cursor++;
+
+ bool negative_exponent = false;
+ const char next_char = peek(state);
+ if (next_char == '-' || next_char == '+') {
+ negative_exponent = next_char == '-';
+ state->cursor++;
+ }
+
+ uint64_t abs_exponent = 0;
+ int exponent_digits = json_parse_digits(state, &abs_exponent);
+
+ if (RB_UNLIKELY(!exponent_digits)) {
+ raise_parse_error_at("invalid number: %s", state, start);
+ }
+
+ if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) {
+ exponent = negative_exponent ? INT64_MIN : INT64_MAX;
+ } else {
+ exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent;
+ }
+ }
+
+ if (integer) {
+ return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
+ }
+
+ // Adjust exponent based on decimal point position
+ if (decimal_point_pos >= 0) {
+ exponent -= (mantissa_digits - decimal_point_pos);
+ }
+
+ return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
+}
+
+static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ return json_parse_number(state, config, false, state->cursor);
+}
+
+static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ return json_parse_number(state, config, true, state->cursor - 1);
+}
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
- raise_parse_error("invalid number: %s", state);
+// How many values (array elements, or interleaved object keys+values) have been
+// pushed onto the rvalue stack since this container opened. Used to size the
+// bulk decode on close, and to tell the first key/colon from later ones.
+static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack)
+{
+ return value_stack->head - frame->value_stack_head;
+}
+
+// A complete value now sits on top of the rvalue stack. Advance the frame that
+// was waiting for it: the root document is done, or the enclosing container
+// moves on to expecting a ',' or its closing bracket. The caller passes the
+// frame it already has in hand -- the one that was expecting the value -- which
+// after a container close is the freshly re-exposed parent.
+static inline void json_value_completed(json_frame *frame)
+{
+ JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
+ JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
+ JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);
+
+ frame->phase = (enum json_frame_phase) frame->type;
+}
+
+ALWAYS_INLINE(static) bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
+{
+ // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
+ // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.
+
+ size_t len = strlen(keyword);
+
+ // Note: memcmp with a small power of two and a literal string compile to an integer comparison /
+ // That's why we sometime compare starting from the first byte and sometimes from the second.
+ if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
+ state->cursor += len;
+ return true;
+ }
+ return false;
+}
+
+// Parse an arbitrary JSON value iteratively. This is a state machine driven
+// entirely by the top frame's phase so it can stop at any value boundary and
+// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
+// bottom of the stack, so the stack is never empty mid-parse and the document
+// itself is just another frame whose value, once parsed, leaves its phase DONE.
+static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
+{
+ json_frame *frame = json_frame_stack_peek(state->frames);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ UNREACHABLE_RETURN(Qundef);
+
+ JSON_PHASE_DONE: {
+ // The root document value is parsed; it is the lone survivor on
+ // the rvalue stack.
+ return *rvalue_stack_peek(state->value_stack, 1);
+ }
+
+ JSON_PHASE_VALUE: {
+ json_eat_whitespace(state);
+
+ VALUE value;
+ switch (peek(state)) {
+ case 'n':
+ if (json_match_keyword(state, "null", 0)) {
+ value = Qnil;
+ break;
}
+ raise_parse_error("unexpected token %s", state);
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
- state->cursor++;
+ case 't':
+ if (json_match_keyword(state, "true", 0)) {
+ value = Qtrue;
+ break;
}
- }
+ raise_parse_error("unexpected token %s", state);
- if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
- integer = false;
- state->cursor++;
- if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
- state->cursor++;
+ case 'f':
+ if (json_match_keyword(state, "false", 1)) {
+ value = Qfalse;
+ break;
}
+ raise_parse_error("unexpected token %s", state);
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
- raise_parse_error("invalid number: %s", state);
+ case 'N':
+ // Note: memcmp with a small power of two compile to an integer comparison
+ if (config->allow_nan && json_match_keyword(state, "NaN", 1)) {
+ value = CNaN;
+ break;
}
+ raise_parse_error("unexpected token %s", state);
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
- state->cursor++;
+ case 'I':
+ if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
+ value = CInfinity;
+ break;
}
- }
+ raise_parse_error("unexpected token %s", state);
- if (integer) {
- return json_push_value(state, config, json_decode_integer(start, state->cursor));
+ case '-': {
+ state->cursor++;
+ if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) {
+ value = CMinusInfinity;
+ } else {
+ value = json_parse_negative_number(state, config);
+ }
+ break;
}
- return json_push_value(state, config, json_decode_float(config, start, state->cursor));
- }
- case '"': {
- // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
- return json_parse_string(state, config, false);
- break;
- }
- case '[': {
- state->cursor++;
- json_eat_whitespace(state);
- long stack_head = state->stack->head;
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+ value = json_parse_positive_number(state, config);
+ break;
+
+ case '"':
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
+ value = json_parse_string(state, config, false);
+ break;
+
+ case '[': {
state->cursor++;
- return json_push_value(state, config, json_decode_array(state, config, 0));
- } else {
+ json_eat_whitespace(state);
+
+ if (peek(state) == ']') {
+ state->cursor++;
+ value = json_decode_array(state, config, 0);
+ break;
+ }
+
state->current_nesting++;
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
}
state->in_array++;
- json_parse_any(state, config);
+
+ // Phase stays VALUE: the next iteration reads the first element.
+ frame = json_frame_stack_push(state, (json_frame){
+ .type = JSON_FRAME_ARRAY,
+ .phase = JSON_PHASE_VALUE,
+ .value_stack_head = state->value_stack->head,
+ });
+ goto JSON_PHASE_VALUE;
}
+ case '{': {
+ const char *object_start_cursor = state->cursor;
- while (true) {
+ state->cursor++;
json_eat_whitespace(state);
- if (state->cursor < state->end) {
- if (*state->cursor == ']') {
- state->cursor++;
- long count = state->stack->head - stack_head;
- state->current_nesting--;
- state->in_array--;
- return json_push_value(state, config, json_decode_array(state, config, count));
- }
+ if (peek(state) == '}') {
+ state->cursor++;
+ value = json_decode_object(state, config, 0);
+ break;
+ }
- if (*state->cursor == ',') {
- state->cursor++;
- if (config->allow_trailing_comma) {
- json_eat_whitespace(state);
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
- continue;
- }
- }
- json_parse_any(state, config);
- continue;
- }
+ state->current_nesting++;
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
}
- raise_parse_error("expected ',' or ']' after array value", state);
+ // Phase KEY: the next iteration reads the first key.
+ frame = json_frame_stack_push(state, (json_frame){
+ .type = JSON_FRAME_OBJECT,
+ .phase = JSON_PHASE_OBJECT_KEY,
+ .value_stack_head = state->value_stack->head,
+ .start_cursor = object_start_cursor,
+ });
+ goto JSON_PHASE_OBJECT_KEY;
}
- break;
+
+ case 0:
+ raise_parse_error("unexpected end of input", state);
+
+ default:
+ raise_parse_error("unexpected character: %s", state);
}
- case '{': {
- const char *object_start_cursor = state->cursor;
- state->cursor++;
- json_eat_whitespace(state);
- long stack_head = state->stack->head;
+ json_push_value(state, config, value);
+ json_value_completed(frame);
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
- state->cursor++;
- return json_push_value(state, config, json_decode_object(state, config, 0));
- } else {
- state->current_nesting++;
- if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
- rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
- }
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- if (*state->cursor != '"') {
- raise_parse_error("expected object key, got %s", state);
- }
- json_parse_string(state, config, true);
+ JSON_PHASE_OBJECT_KEY: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
- raise_parse_error("expected ':' after object key", state);
- }
- state->cursor++;
+ json_eat_whitespace(state);
- json_parse_any(state, config);
+ if (RB_LIKELY(peek(state) == '"')) {
+ json_push_value(state, config, json_parse_string(state, config, true));
+ frame->phase = JSON_PHASE_OBJECT_COLON;
+ goto JSON_PHASE_OBJECT_COLON;
+ } else {
+ // The message differs for the first key vs. a key after a
+ // ',': the first is the only one reached with nothing pushed
+ // for this object yet.
+ if (json_frame_entry_count(frame, state->value_stack) == 0) {
+ raise_parse_error("expected object key, got %s", state);
+ } else {
+ raise_parse_error("expected object key, got: %s", state);
}
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- while (true) {
- json_eat_whitespace(state);
+ JSON_PHASE_OBJECT_COLON: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- if (state->cursor < state->end) {
- if (*state->cursor == '}') {
- state->cursor++;
- state->current_nesting--;
- size_t count = state->stack->head - stack_head;
+ json_eat_whitespace(state);
- // Temporary rewind cursor in case an error is raised
- const char *final_cursor = state->cursor;
- state->cursor = object_start_cursor;
- VALUE object = json_decode_object(state, config, count);
- state->cursor = final_cursor;
+ if (RB_LIKELY(peek(state) == ':')) {
+ state->cursor++;
+ frame->phase = JSON_PHASE_VALUE;
+ goto JSON_PHASE_VALUE;
+ } else {
+ // First colon (only the first pair's key is pushed, nothing
+ // else) vs. a later one.
+ if (json_frame_entry_count(frame, state->value_stack) == 1) {
+ raise_parse_error("expected ':' after object key", state);
+ } else {
+ raise_parse_error("expected ':' after object key, got: %s", state);
+ }
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- return json_push_value(state, config, object);
- }
+ JSON_PHASE_ARRAY_COMMA: {
+ JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
- if (*state->cursor == ',') {
- state->cursor++;
- json_eat_whitespace(state);
+ json_eat_whitespace(state);
- if (config->allow_trailing_comma) {
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
- continue;
- }
- }
+ const char next_char = peek(state);
- if (*state->cursor != '"') {
- raise_parse_error("expected object key, got: %s", state);
- }
- json_parse_string(state, config, true);
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == ']') {
+ // Trailing comma: stay in COMMA to close on the next iteration.
+ goto JSON_PHASE_ARRAY_COMMA;
+ }
+ }
+ frame->phase = JSON_PHASE_VALUE;
+ goto JSON_PHASE_VALUE;
+ } else if (next_char == ']') {
+ state->cursor++;
+ long count = json_frame_entry_count(frame, state->value_stack);
+ state->current_nesting--;
+ state->in_array--;
+ json_frame_stack_pop(state->frames);
+ json_push_value(state, config, json_decode_array(state, config, count));
+ frame = json_frame_stack_peek(state->frames);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
+ }
+ } else {
+ raise_parse_error("expected ',' or ']' after array value", state);
+ }
+ UNREACHABLE_RETURN(Qundef);
+ }
- json_eat_whitespace(state);
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
- raise_parse_error("expected ':' after object key, got: %s", state);
- }
- state->cursor++;
+ JSON_PHASE_OBJECT_COMMA: {
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_parse_any(state, config);
+ json_eat_whitespace(state);
+ const char next_char = peek(state);
- continue;
- }
+ if (RB_LIKELY(next_char == ',')) {
+ state->cursor++;
+
+ if (config->allow_trailing_comma) {
+ json_eat_whitespace(state);
+ if (peek(state) == '}') {
+ // Trailing comma: stay in COMMA to close on the next iteration.
+ goto JSON_PHASE_OBJECT_COMMA;
}
+ }
- raise_parse_error("expected ',' or '}' after object value, got: %s", state);
+ frame->phase = JSON_PHASE_OBJECT_KEY;
+ goto JSON_PHASE_OBJECT_KEY;
+ } else if (next_char == '}') {
+ state->cursor++;
+ state->current_nesting--;
+ size_t count = json_frame_entry_count(frame, state->value_stack);
+
+ // Temporary rewind cursor in case an error is raised
+ const char *final_cursor = state->cursor;
+ state->cursor = frame->start_cursor;
+ VALUE object = json_decode_object(state, config, count);
+ state->cursor = final_cursor;
+
+ json_push_value(state, config, object);
+ json_frame_stack_pop(state->frames);
+ frame = json_frame_stack_peek(state->frames);
+ json_value_completed(frame);
+
+ switch (frame->phase) {
+ case JSON_PHASE_DONE: goto JSON_PHASE_DONE;
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
+ case JSON_PHASE_OBJECT_KEY: UNREACHABLE_RETURN(Qundef);
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
}
- break;
+ } else {
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
}
-
- default:
- raise_parse_error("unexpected character: %s", state);
- break;
+ UNREACHABLE_RETURN(Qundef);
}
- raise_parse_error("unreachable: %s", state);
+ UNREACHABLE_RETURN(Qundef);
}
static void json_ensure_eof(JSON_ParserState *state)
{
json_eat_whitespace(state);
- if (state->cursor != state->end) {
+ if (!eos(state)) {
raise_parse_error("unexpected token at end of stream %s", state);
}
}
@@ -1290,38 +1788,56 @@ static void json_ensure_eof(JSON_ParserState *state)
static VALUE convert_encoding(VALUE source)
{
- int encindex = RB_ENCODING_GET(source);
+ StringValue(source);
+ int encindex = RB_ENCODING_GET(source);
- if (RB_LIKELY(encindex == utf8_encindex)) {
+ if (RB_LIKELY(encindex == utf8_encindex)) {
+ return source;
+ }
+
+ if (encindex == binary_encindex) {
+ // For historical reason, we silently reinterpret binary strings as UTF-8
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
+ }
+
+ source = rb_funcall(source, i_encode, 1, Encoding_UTF_8);
+ StringValue(source);
return source;
- }
+}
- if (encindex == binary_encindex) {
- // For historical reason, we silently reinterpret binary strings as UTF-8
- return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
- }
+struct parser_config_init_args {
+ JSON_ParserConfig *config;
+ VALUE self;
+};
- return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
+static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val)
+{
+ *dest = val;
+ if (self) RB_OBJ_WRITTEN(self, Qundef, val);
}
static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
{
- JSON_ParserConfig *config = (JSON_ParserConfig *)data;
-
- if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
- else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
- else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
- else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
- else if (key == sym_freeze) { config->freeze = RTEST(val); }
- else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
- else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
- else if (key == sym_decimal_class) {
+ struct parser_config_init_args *args = (struct parser_config_init_args *)data;
+ JSON_ParserConfig *config = args->config;
+ VALUE self = args->self;
+
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
+ else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
+ else if (key == sym_on_load) { parser_config_wb_write(self, &config->on_load_proc, RTEST(val) ? val : Qfalse); }
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
+ else if (key == sym_decimal_class) {
if (RTEST(val)) {
if (rb_respond_to(val, i_try_convert)) {
- config->decimal_class = val;
+ parser_config_wb_write(self, &config->decimal_class, val);
config->decimal_method_id = i_try_convert;
} else if (rb_respond_to(val, i_new)) {
- config->decimal_class = val;
+ parser_config_wb_write(self, &config->decimal_class, val);
config->decimal_method_id = i_new;
} else if (RB_TYPE_P(val, T_CLASS)) {
VALUE name = rb_class_name(val);
@@ -1330,7 +1846,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
if (last_colon) {
const char *mod_path_end = last_colon - 1;
VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
- config->decimal_class = rb_path_to_class(mod_path);
+ parser_config_wb_write(self, &config->decimal_class, rb_path_to_class(mod_path));
const char *method_name_beg = last_colon + 1;
long before_len = method_name_beg - name_cstr;
@@ -1338,7 +1854,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
VALUE method_name = rb_str_substr(name, before_len, len);
config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
} else {
- config->decimal_class = rb_mKernel;
+ parser_config_wb_write(self, &config->decimal_class, rb_mKernel);
config->decimal_method_id = SYM2ID(rb_str_intern(name));
}
}
@@ -1348,16 +1864,21 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
return ST_CONTINUE;
}
-static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
+static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self)
{
config->max_nesting = 100;
+ struct parser_config_init_args args = {
+ .config = config,
+ .self = self,
+ };
+
if (!NIL_P(opts)) {
Check_Type(opts, T_HASH);
if (RHASH_SIZE(opts) > 0) {
// We assume in most cases few keys are set so it's faster to go over
// the provided keys than to check all possible keys.
- rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
}
}
@@ -1388,36 +1909,62 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
*/
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
{
+ rb_check_frozen(self);
GET_PARSER_CONFIG;
- parser_config_init(config, opts);
-
- RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
+ parser_config_init(config, opts, self);
return self;
}
-static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
+static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
{
- Vsource = convert_encoding(StringValue(Vsource));
- StringValue(Vsource);
+ VALUE Vsource = convert_encoding(src);
+
+ // Ensure the string isn't mutated under us.
+ // The classic API to use is `rb_str_locktmp`, but then we'd
+ // need to use `rb_protect` to make sure we always unlock.
+ if (Vsource == src) {
+ Vsource = rb_str_new_frozen(Vsource);
+ }
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
- rvalue_stack stack = {
+ rvalue_stack value_stack = {
.type = RVALUE_STACK_STACK_ALLOCATED,
.ptr = rvalue_stack_buffer,
.capa = RVALUE_STACK_INITIAL_CAPA,
};
+ // Seed the frame stack with the root frame, establishing the invariant that
+ // json_parse_any always has a top frame to dispatch on (so the stack is never
+ // empty mid-parse).
+ json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA];
+ frame_stack_buffer[0] = (json_frame){
+ .type = JSON_FRAME_ROOT,
+ .phase = JSON_PHASE_VALUE,
+ };
+ json_frame_stack frames = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = frame_stack_buffer,
+ .capa = JSON_FRAME_STACK_INITIAL_CAPA,
+ .head = 1,
+ };
+
long len;
const char *start;
+
RSTRING_GETMEM(Vsource, start, len);
+ VALUE value_stack_handle = 0;
+ VALUE frame_stack_handle = 0;
JSON_ParserState _state = {
.start = start,
.cursor = start,
.end = start + len,
- .stack = &stack,
+ .value_stack = &value_stack,
+ .value_stack_handle = &value_stack_handle,
+ .frames = &frames,
+ .frame_stack_handle = &frame_stack_handle,
};
JSON_ParserState *state = &_state;
@@ -1425,8 +1972,11 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
// This may be skipped in case of exception, but
// it won't cause a leak.
- rvalue_stack_eagerly_release(state->stack_handle);
-
+ rvalue_stack_eagerly_release(value_stack_handle);
+ json_frame_stack_eagerly_release(frame_stack_handle);
+ RB_GC_GUARD(value_stack_handle);
+ RB_GC_GUARD(frame_stack_handle);
+ RB_GC_GUARD(Vsource);
json_ensure_eof(state);
return result;
@@ -1447,12 +1997,9 @@ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
{
- Vsource = convert_encoding(StringValue(Vsource));
- StringValue(Vsource);
-
JSON_ParserConfig _config = {0};
JSON_ParserConfig *config = &_config;
- parser_config_init(config, opts);
+ parser_config_init(config, opts, false);
return cParser_parse(config, Vsource);
}
@@ -1460,30 +2007,35 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
static void JSON_ParserConfig_mark(void *ptr)
{
JSON_ParserConfig *config = ptr;
- rb_gc_mark(config->on_load_proc);
- rb_gc_mark(config->decimal_class);
+ rb_gc_mark_movable(config->on_load_proc);
+ rb_gc_mark_movable(config->decimal_class);
}
-static void JSON_ParserConfig_free(void *ptr)
+static size_t JSON_ParserConfig_memsize(const void *ptr)
{
- JSON_ParserConfig *config = ptr;
- ruby_xfree(config);
+#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
+ return 0;
+#else
+ return sizeof(JSON_ParserConfig);
+#endif
}
-static size_t JSON_ParserConfig_memsize(const void *ptr)
+static void JSON_ParserConfig_compact(void *ptr)
{
- return sizeof(JSON_ParserConfig);
+ JSON_ParserConfig *config = ptr;
+ config->on_load_proc = rb_gc_location(config->on_load_proc);
+ config->decimal_class = rb_gc_location(config->decimal_class);
}
static const rb_data_type_t JSON_ParserConfig_type = {
- "JSON::Ext::Parser/ParserConfig",
- {
- JSON_ParserConfig_mark,
- JSON_ParserConfig_free,
- JSON_ParserConfig_memsize,
+ .wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
+ .function = {
+ .dmark = JSON_ParserConfig_mark,
+ .dfree = RUBY_DEFAULT_FREE,
+ .dsize = JSON_ParserConfig_memsize,
+ .dcompact = JSON_ParserConfig_compact,
},
- 0, 0,
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
};
static VALUE cJSON_parser_s_allocate(VALUE klass)
@@ -1527,16 +2079,14 @@ void Init_parser(void)
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
+ sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
sym_freeze = ID2SYM(rb_intern("freeze"));
sym_on_load = ID2SYM(rb_intern("on_load"));
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
- i_chr = rb_intern("chr");
- i_aset = rb_intern("[]=");
- i_aref = rb_intern("[]");
- i_leftshift = rb_intern("<<");
i_new = rb_intern("new");
i_try_convert = rb_intern("try_convert");
i_uminus = rb_intern("-@");
diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h
index 3abbdb0209..611b41b066 100644
--- a/ext/json/simd/simd.h
+++ b/ext/json/simd/simd.h
@@ -1,10 +1,14 @@
+#include "../json.h"
+
typedef enum {
SIMD_NONE,
SIMD_NEON,
SIMD_SSE2
} SIMD_Implementation;
-#ifdef JSON_ENABLE_SIMD
+#ifndef __has_builtin // Optional of course.
+ #define __has_builtin(x) 0 // Compatibility with non-clang compilers.
+#endif
#ifdef __clang__
# if __has_builtin(__builtin_ctzll)
@@ -20,6 +24,8 @@ typedef enum {
static inline uint32_t trailing_zeros64(uint64_t input)
{
+ JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior
+
#if HAVE_BUILTIN_CTZLL
return __builtin_ctzll(input);
#else
@@ -35,6 +41,8 @@ static inline uint32_t trailing_zeros64(uint64_t input)
static inline int trailing_zeros(int input)
{
+ JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior
+
#if HAVE_BUILTIN_CTZLL
return __builtin_ctz(input);
#else
@@ -48,14 +56,36 @@ static inline int trailing_zeros(int input)
#endif
}
-#if (defined(__GNUC__ ) || defined(__clang__))
-#define FORCE_INLINE __attribute__((always_inline))
-#else
-#define FORCE_INLINE
-#endif
+#ifdef JSON_ENABLE_SIMD
+#define SIMD_MINIMUM_THRESHOLD 4
-#define SIMD_MINIMUM_THRESHOLD 6
+ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
+{
+ RBIMPL_ASSERT_OR_ASSUME(len < 16);
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
+#if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
+ // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
+ // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
+ // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
+ // position in both copies.
+
+ // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
+ // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
+ // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
+ // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
+ // plus two loads and stores generated when using __builtin_memcpy.
+ if (len >= 8) {
+ __builtin_memcpy(dst, src, 8);
+ __builtin_memcpy(dst + len - 8, src + len - 8, 8);
+ } else {
+ __builtin_memcpy(dst, src, 4);
+ __builtin_memcpy(dst + len - 4, src + len - 4, 4);
+ }
+#else
+ MEMCPY(dst, src, char, len);
+#endif
+}
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>
@@ -70,14 +100,14 @@ static inline SIMD_Implementation find_simd_implementation(void)
#define HAVE_SIMD_NEON 1
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
-static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
+ALWAYS_INLINE(static) uint64_t neon_match_mask(uint8x16_t matches)
{
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
return mask & 0x8888888888888888ull;
}
-static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr)
+ALWAYS_INLINE(static) uint64_t compute_chunk_mask_neon(const char *ptr)
{
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
@@ -90,7 +120,7 @@ static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr)
return neon_match_mask(needs_escape);
}
-static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
+ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
{
while (*ptr + sizeof(uint8x16_t) <= end) {
uint64_t chunk_mask = compute_chunk_mask_neon(*ptr);
@@ -103,16 +133,6 @@ static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const cha
return 0;
}
-static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)
-{
- uint8x16x4_t tab;
- tab.val[0] = vld1q_u8(table);
- tab.val[1] = vld1q_u8(table+16);
- tab.val[2] = vld1q_u8(table+32);
- tab.val[3] = vld1q_u8(table+48);
- return tab;
-}
-
#endif /* ARM Neon Support.*/
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
@@ -137,7 +157,7 @@ static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
-static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr)
+ALWAYS_INLINE(static) TARGET_SSE2 int compute_chunk_mask_sse2(const char *ptr)
{
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
@@ -148,7 +168,7 @@ static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *p
return _mm_movemask_epi8(needs_escape);
}
-static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
+ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
{
while (*ptr + sizeof(__m128i) <= end) {
int chunk_mask = compute_chunk_mask_sse2(*ptr);
diff --git a/ext/json/vendor/fpconv.c b/ext/json/vendor/fpconv.c
index 75efd46f11..6c9bc2c103 100644
--- a/ext/json/vendor/fpconv.c
+++ b/ext/json/vendor/fpconv.c
@@ -29,6 +29,10 @@
#include <string.h>
#include <stdint.h>
+#if JSON_DEBUG
+#include <assert.h>
+#endif
+
#define npowers 87
#define steppowers 8
#define firstpower -348 /* 10 ^ -348 */
@@ -320,15 +324,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
{
int exp = absv(K + ndigits - 1);
- int max_trailing_zeros = 7;
-
- if(neg) {
- max_trailing_zeros -= 1;
- }
-
- /* write plain integer */
- if(K >= 0 && (exp < (ndigits + max_trailing_zeros))) {
-
+ if(K >= 0 && exp < 15) {
memcpy(dest, digits, ndigits);
memset(dest + ndigits, '0', K);
@@ -432,10 +428,12 @@ static int filter_special(double fp, char* dest)
*
* Input:
* fp -> the double to convert, dest -> destination buffer.
- * The generated string will never be longer than 28 characters.
- * Make sure to pass a pointer to at least 28 bytes of memory.
+ * The generated string will never be longer than 32 characters.
+ * Make sure to pass a pointer to at least 32 bytes of memory.
* The emitted string will not be null terminated.
*
+ *
+ *
* Output:
* The number of written characters.
*
@@ -451,7 +449,7 @@ static int filter_special(double fp, char* dest)
* }
*
*/
-static int fpconv_dtoa(double d, char dest[28])
+static int fpconv_dtoa(double d, char dest[32])
{
char digits[18];
@@ -474,6 +472,9 @@ static int fpconv_dtoa(double d, char dest[28])
int ndigits = grisu2(d, digits, &K);
str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);
+#if JSON_DEBUG
+ assert(str_len <= 32);
+#endif
return str_len;
}
diff --git a/ext/json/vendor/ryu.h b/ext/json/vendor/ryu.h
new file mode 100644
index 0000000000..f06ec814b4
--- /dev/null
+++ b/ext/json/vendor/ryu.h
@@ -0,0 +1,819 @@
+// Copyright 2018 Ulf Adams
+//
+// The contents of this file may be used under the terms of the Apache License,
+// Version 2.0.
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+//
+// ---
+//
+// Apache License
+// Version 2.0, January 2004
+// http://www.apache.org/licenses/
+//
+// TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+//
+// 1. Definitions.
+//
+// "License" shall mean the terms and conditions for use, reproduction,
+// and distribution as defined by Sections 1 through 9 of this document.
+//
+// "Licensor" shall mean the copyright owner or entity authorized by
+// the copyright owner that is granting the License.
+//
+// "Legal Entity" shall mean the union of the acting entity and all
+// other entities that control, are controlled by, or are under common
+// control with that entity. For the purposes of this definition,
+// "control" means (i) the power, direct or indirect, to cause the
+// direction or management of such entity, whether by contract or
+// otherwise, or (ii) ownership of fifty percent (50%) or more of the
+// outstanding shares, or (iii) beneficial ownership of such entity.
+//
+// "You" (or "Your") shall mean an individual or Legal Entity
+// exercising permissions granted by this License.
+//
+// "Source" form shall mean the preferred form for making modifications,
+// including but not limited to software source code, documentation
+// source, and configuration files.
+//
+// "Object" form shall mean any form resulting from mechanical
+// transformation or translation of a Source form, including but
+// not limited to compiled object code, generated documentation,
+// and conversions to other media types.
+//
+// "Work" shall mean the work of authorship, whether in Source or
+// Object form, made available under the License, as indicated by a
+// copyright notice that is included in or attached to the work
+// (an example is provided in the Appendix below).
+//
+// "Derivative Works" shall mean any work, whether in Source or Object
+// form, that is based on (or derived from) the Work and for which the
+// editorial revisions, annotations, elaborations, or other modifications
+// represent, as a whole, an original work of authorship. For the purposes
+// of this License, Derivative Works shall not include works that remain
+// separable from, or merely link (or bind by name) to the interfaces of,
+// the Work and Derivative Works thereof.
+//
+// "Contribution" shall mean any work of authorship, including
+// the original version of the Work and any modifications or additions
+// to that Work or Derivative Works thereof, that is intentionally
+// submitted to Licensor for inclusion in the Work by the copyright owner
+// or by an individual or Legal Entity authorized to submit on behalf of
+// the copyright owner. For the purposes of this definition, "submitted"
+// means any form of electronic, verbal, or written communication sent
+// to the Licensor or its representatives, including but not limited to
+// communication on electronic mailing lists, source code control systems,
+// and issue tracking systems that are managed by, or on behalf of, the
+// Licensor for the purpose of discussing and improving the Work, but
+// excluding communication that is conspicuously marked or otherwise
+// designated in writing by the copyright owner as "Not a Contribution."
+//
+// "Contributor" shall mean Licensor and any individual or Legal Entity
+// on behalf of whom a Contribution has been received by Licensor and
+// subsequently incorporated within the Work.
+//
+// 2. Grant of Copyright License. Subject to the terms and conditions of
+// this License, each Contributor hereby grants to You a perpetual,
+// worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+// copyright license to reproduce, prepare Derivative Works of,
+// publicly display, publicly perform, sublicense, and distribute the
+// Work and such Derivative Works in Source or Object form.
+//
+// 3. Grant of Patent License. Subject to the terms and conditions of
+// this License, each Contributor hereby grants to You a perpetual,
+// worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+// (except as stated in this section) patent license to make, have made,
+// use, offer to sell, sell, import, and otherwise transfer the Work,
+// where such license applies only to those patent claims licensable
+// by such Contributor that are necessarily infringed by their
+// Contribution(s) alone or by combination of their Contribution(s)
+// with the Work to which such Contribution(s) was submitted. If You
+// institute patent litigation against any entity (including a
+// cross-claim or counterclaim in a lawsuit) alleging that the Work
+// or a Contribution incorporated within the Work constitutes direct
+// or contributory patent infringement, then any patent licenses
+// granted to You under this License for that Work shall terminate
+// as of the date such litigation is filed.
+//
+// 4. Redistribution. You may reproduce and distribute copies of the
+// Work or Derivative Works thereof in any medium, with or without
+// modifications, and in Source or Object form, provided that You
+// meet the following conditions:
+//
+// (a) You must give any other recipients of the Work or
+// Derivative Works a copy of this License; and
+//
+// (b) You must cause any modified files to carry prominent notices
+// stating that You changed the files; and
+//
+// (c) You must retain, in the Source form of any Derivative Works
+// that You distribute, all copyright, patent, trademark, and
+// attribution notices from the Source form of the Work,
+// excluding those notices that do not pertain to any part of
+// the Derivative Works; and
+//
+// (d) If the Work includes a "NOTICE" text file as part of its
+// distribution, then any Derivative Works that You distribute must
+// include a readable copy of the attribution notices contained
+// within such NOTICE file, excluding those notices that do not
+// pertain to any part of the Derivative Works, in at least one
+// of the following places: within a NOTICE text file distributed
+// as part of the Derivative Works; within the Source form or
+// documentation, if provided along with the Derivative Works; or,
+// within a display generated by the Derivative Works, if and
+// wherever such third-party notices normally appear. The contents
+// of the NOTICE file are for informational purposes only and
+// do not modify the License. You may add Your own attribution
+// notices within Derivative Works that You distribute, alongside
+// or as an addendum to the NOTICE text from the Work, provided
+// that such additional attribution notices cannot be construed
+// as modifying the License.
+//
+// You may add Your own copyright statement to Your modifications and
+// may provide additional or different license terms and conditions
+// for use, reproduction, or distribution of Your modifications, or
+// for any such Derivative Works as a whole, provided Your use,
+// reproduction, and distribution of the Work otherwise complies with
+// the conditions stated in this License.
+//
+// 5. Submission of Contributions. Unless You explicitly state otherwise,
+// any Contribution intentionally submitted for inclusion in the Work
+// by You to the Licensor shall be under the terms and conditions of
+// this License, without any additional terms or conditions.
+// Notwithstanding the above, nothing herein shall supersede or modify
+// the terms of any separate license agreement you may have executed
+// with Licensor regarding such Contributions.
+//
+// 6. Trademarks. This License does not grant permission to use the trade
+// names, trademarks, service marks, or product names of the Licensor,
+// except as required for reasonable and customary use in describing the
+// origin of the Work and reproducing the content of the NOTICE file.
+//
+// 7. Disclaimer of Warranty. Unless required by applicable law or
+// agreed to in writing, Licensor provides the Work (and each
+// Contributor provides its Contributions) on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied, including, without limitation, any warranties or conditions
+// of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+// PARTICULAR PURPOSE. You are solely responsible for determining the
+// appropriateness of using or redistributing the Work and assume any
+// risks associated with Your exercise of permissions under this License.
+//
+// 8. Limitation of Liability. In no event and under no legal theory,
+// whether in tort (including negligence), contract, or otherwise,
+// unless required by applicable law (such as deliberate and grossly
+// negligent acts) or agreed to in writing, shall any Contributor be
+// liable to You for damages, including any direct, indirect, special,
+// incidental, or consequential damages of any character arising as a
+// result of this License or out of the use or inability to use the
+// Work (including but not limited to damages for loss of goodwill,
+// work stoppage, computer failure or malfunction, or any and all
+// other commercial damages or losses), even if such Contributor
+// has been advised of the possibility of such damages.
+//
+// 9. Accepting Warranty or Additional Liability. While redistributing
+// the Work or Derivative Works thereof, You may choose to offer,
+// and charge a fee for, acceptance of support, warranty, indemnity,
+// or other liability obligations and/or rights consistent with this
+// License. However, in accepting such obligations, You may act only
+// on Your own behalf and on Your sole responsibility, not on behalf
+// of any other Contributor, and only if You agree to indemnify,
+// defend, and hold each Contributor harmless for any liability
+// incurred by, or claims asserted against, such Contributor by reason
+// of your accepting any such warranty or additional liability.
+//
+// END OF TERMS AND CONDITIONS
+//
+// APPENDIX: How to apply the Apache License to your work.
+//
+// To apply the Apache License to your work, attach the following
+// boilerplate notice, with the fields enclosed by brackets "[]"
+// replaced with your own identifying information. (Don't include
+// the brackets!) The text should be enclosed in the appropriate
+// comment syntax for the file format. We also recommend that a
+// file or class name and description of purpose be included on the
+// same "printed page" as the copyright notice for easier
+// identification within third-party archives.
+//
+// Copyright [yyyy] [name of copyright owner]
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// ---
+//
+// Boost Software License - Version 1.0 - August 17th, 2003
+//
+// Permission is hereby granted, free of charge, to any person or organization
+// obtaining a copy of the software and accompanying documentation covered by
+// this license (the "Software") to use, reproduce, display, distribute,
+// execute, and transmit the Software, and to prepare derivative works of the
+// Software, and to permit third-parties to whom the Software is furnished to
+// do so, all subject to the following:
+//
+// The copyright notices in the Software and this entire statement, including
+// the above license grant, this restriction and the following disclaimer,
+// must be included in all copies of the Software, in whole or in part, and
+// all derivative works of the Software, unless such copies or derivative
+// works are solely in the form of machine-executable object code generated by
+// a source language processor.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+// ---
+// Minimal Ryu implementation adapted for Ruby JSON gem by Josef Šimánek
+// Optimized for pre-extracted mantissa/exponent from JSON parsing
+// This is a stripped-down version containing only what's needed for
+// converting decimal mantissa+exponent to IEEE 754 double precision.
+
+#ifndef RYU_H
+#define RYU_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+
+// Detect __builtin_clzll availability (for floor_log2)
+// Note: MSVC doesn't have __builtin_clzll, so we provide a fallback
+#ifdef __clang__
+ #if __has_builtin(__builtin_clzll)
+ #define RYU_HAVE_BUILTIN_CLZLL 1
+ #else
+ #define RYU_HAVE_BUILTIN_CLZLL 0
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+ #define RYU_HAVE_BUILTIN_CLZLL 1
+#else
+ #define RYU_HAVE_BUILTIN_CLZLL 0
+#endif
+
+// Count leading zeros (for floor_log2)
+static inline uint32_t ryu_leading_zeros64(uint64_t input)
+{
+#if RYU_HAVE_BUILTIN_CLZLL
+ return __builtin_clzll(input);
+#else
+ // Fallback: binary search for the highest set bit
+ // This works on MSVC and other compilers without __builtin_clzll
+ if (input == 0) return 64;
+ uint32_t n = 0;
+ if (input <= 0x00000000FFFFFFFFULL) { n += 32; input <<= 32; }
+ if (input <= 0x0000FFFFFFFFFFFFULL) { n += 16; input <<= 16; }
+ if (input <= 0x00FFFFFFFFFFFFFFULL) { n += 8; input <<= 8; }
+ if (input <= 0x0FFFFFFFFFFFFFFFULL) { n += 4; input <<= 4; }
+ if (input <= 0x3FFFFFFFFFFFFFFFULL) { n += 2; input <<= 2; }
+ if (input <= 0x7FFFFFFFFFFFFFFFULL) { n += 1; }
+ return n;
+#endif
+}
+
+// These tables are generated by PrintDoubleLookupTable.
+#define DOUBLE_POW5_INV_BITCOUNT 125
+#define DOUBLE_POW5_BITCOUNT 125
+
+#define DOUBLE_POW5_INV_TABLE_SIZE 342
+#define DOUBLE_POW5_TABLE_SIZE 326
+
+static const uint64_t DOUBLE_POW5_INV_SPLIT[DOUBLE_POW5_INV_TABLE_SIZE][2] = {
+ { 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u },
+ { 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u },
+ { 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u },
+ { 13026647942995916322u, 1208925819614629174u }, { 9774590264567735146u, 1934281311383406679u },
+ { 11509021026396098440u, 1547425049106725343u }, { 16585914450600699399u, 1237940039285380274u },
+ { 15469416676735388068u, 1980704062856608439u }, { 16064882156130220778u, 1584563250285286751u },
+ { 9162556910162266299u, 1267650600228229401u }, { 7281393426775805432u, 2028240960365167042u },
+ { 16893161185646375315u, 1622592768292133633u }, { 2446482504291369283u, 1298074214633706907u },
+ { 7603720821608101175u, 2076918743413931051u }, { 2393627842544570617u, 1661534994731144841u },
+ { 16672297533003297786u, 1329227995784915872u }, { 11918280793837635165u, 2126764793255865396u },
+ { 5845275820328197809u, 1701411834604692317u }, { 15744267100488289217u, 1361129467683753853u },
+ { 3054734472329800808u, 2177807148294006166u }, { 17201182836831481939u, 1742245718635204932u },
+ { 6382248639981364905u, 1393796574908163946u }, { 2832900194486363201u, 2230074519853062314u },
+ { 5955668970331000884u, 1784059615882449851u }, { 1075186361522890384u, 1427247692705959881u },
+ { 12788344622662355584u, 2283596308329535809u }, { 13920024512871794791u, 1826877046663628647u },
+ { 3757321980813615186u, 1461501637330902918u }, { 10384555214134712795u, 1169201309864722334u },
+ { 5547241898389809503u, 1870722095783555735u }, { 4437793518711847602u, 1496577676626844588u },
+ { 10928932444453298728u, 1197262141301475670u }, { 17486291911125277965u, 1915619426082361072u },
+ { 6610335899416401726u, 1532495540865888858u }, { 12666966349016942027u, 1225996432692711086u },
+ { 12888448528943286597u, 1961594292308337738u }, { 17689456452638449924u, 1569275433846670190u },
+ { 14151565162110759939u, 1255420347077336152u }, { 7885109000409574610u, 2008672555323737844u },
+ { 9997436015069570011u, 1606938044258990275u }, { 7997948812055656009u, 1285550435407192220u },
+ { 12796718099289049614u, 2056880696651507552u }, { 2858676849947419045u, 1645504557321206042u },
+ { 13354987924183666206u, 1316403645856964833u }, { 17678631863951955605u, 2106245833371143733u },
+ { 3074859046935833515u, 1684996666696914987u }, { 13527933681774397782u, 1347997333357531989u },
+ { 10576647446613305481u, 2156795733372051183u }, { 15840015586774465031u, 1725436586697640946u },
+ { 8982663654677661702u, 1380349269358112757u }, { 18061610662226169046u, 2208558830972980411u },
+ { 10759939715039024913u, 1766847064778384329u }, { 12297300586773130254u, 1413477651822707463u },
+ { 15986332124095098083u, 2261564242916331941u }, { 9099716884534168143u, 1809251394333065553u },
+ { 14658471137111155161u, 1447401115466452442u }, { 4348079280205103483u, 1157920892373161954u },
+ { 14335624477811986218u, 1852673427797059126u }, { 7779150767507678651u, 1482138742237647301u },
+ { 2533971799264232598u, 1185710993790117841u }, { 15122401323048503126u, 1897137590064188545u },
+ { 12097921058438802501u, 1517710072051350836u }, { 5988988032009131678u, 1214168057641080669u },
+ { 16961078480698431330u, 1942668892225729070u }, { 13568862784558745064u, 1554135113780583256u },
+ { 7165741412905085728u, 1243308091024466605u }, { 11465186260648137165u, 1989292945639146568u },
+ { 16550846638002330379u, 1591434356511317254u }, { 16930026125143774626u, 1273147485209053803u },
+ { 4951948911778577463u, 2037035976334486086u }, { 272210314680951647u, 1629628781067588869u },
+ { 3907117066486671641u, 1303703024854071095u }, { 6251387306378674625u, 2085924839766513752u },
+ { 16069156289328670670u, 1668739871813211001u }, { 9165976216721026213u, 1334991897450568801u },
+ { 7286864317269821294u, 2135987035920910082u }, { 16897537898041588005u, 1708789628736728065u },
+ { 13518030318433270404u, 1367031702989382452u }, { 6871453250525591353u, 2187250724783011924u },
+ { 9186511415162383406u, 1749800579826409539u }, { 11038557946871817048u, 1399840463861127631u },
+ { 10282995085511086630u, 2239744742177804210u }, { 8226396068408869304u, 1791795793742243368u },
+ { 13959814484210916090u, 1433436634993794694u }, { 11267656730511734774u, 2293498615990071511u },
+ { 5324776569667477496u, 1834798892792057209u }, { 7949170070475892320u, 1467839114233645767u },
+ { 17427382500606444826u, 1174271291386916613u }, { 5747719112518849781u, 1878834066219066582u },
+ { 15666221734240810795u, 1503067252975253265u }, { 12532977387392648636u, 1202453802380202612u },
+ { 5295368560860596524u, 1923926083808324180u }, { 4236294848688477220u, 1539140867046659344u },
+ { 7078384693692692099u, 1231312693637327475u }, { 11325415509908307358u, 1970100309819723960u },
+ { 9060332407926645887u, 1576080247855779168u }, { 14626963555825137356u, 1260864198284623334u },
+ { 12335095245094488799u, 2017382717255397335u }, { 9868076196075591040u, 1613906173804317868u },
+ { 15273158586344293478u, 1291124939043454294u }, { 13369007293925138595u, 2065799902469526871u },
+ { 7005857020398200553u, 1652639921975621497u }, { 16672732060544291412u, 1322111937580497197u },
+ { 11918976037903224966u, 2115379100128795516u }, { 5845832015580669650u, 1692303280103036413u },
+ { 12055363241948356366u, 1353842624082429130u }, { 841837113407818570u, 2166148198531886609u },
+ { 4362818505468165179u, 1732918558825509287u }, { 14558301248600263113u, 1386334847060407429u },
+ { 12225235553534690011u, 2218135755296651887u }, { 2401490813343931363u, 1774508604237321510u },
+ { 1921192650675145090u, 1419606883389857208u }, { 17831303500047873437u, 2271371013423771532u },
+ { 6886345170554478103u, 1817096810739017226u }, { 1819727321701672159u, 1453677448591213781u },
+ { 16213177116328979020u, 1162941958872971024u }, { 14873036941900635463u, 1860707134196753639u },
+ { 15587778368262418694u, 1488565707357402911u }, { 8780873879868024632u, 1190852565885922329u },
+ { 2981351763563108441u, 1905364105417475727u }, { 13453127855076217722u, 1524291284333980581u },
+ { 7073153469319063855u, 1219433027467184465u }, { 11317045550910502167u, 1951092843947495144u },
+ { 12742985255470312057u, 1560874275157996115u }, { 10194388204376249646u, 1248699420126396892u },
+ { 1553625868034358140u, 1997919072202235028u }, { 8621598323911307159u, 1598335257761788022u },
+ { 17965325103354776697u, 1278668206209430417u }, { 13987124906400001422u, 2045869129935088668u },
+ { 121653480894270168u, 1636695303948070935u }, { 97322784715416134u, 1309356243158456748u },
+ { 14913111714512307107u, 2094969989053530796u }, { 8241140556867935363u, 1675975991242824637u },
+ { 17660958889720079260u, 1340780792994259709u }, { 17189487779326395846u, 2145249268790815535u },
+ { 13751590223461116677u, 1716199415032652428u }, { 18379969808252713988u, 1372959532026121942u },
+ { 14650556434236701088u, 2196735251241795108u }, { 652398703163629901u, 1757388200993436087u },
+ { 11589965406756634890u, 1405910560794748869u }, { 7475898206584884855u, 2249456897271598191u },
+ { 2291369750525997561u, 1799565517817278553u }, { 9211793429904618695u, 1439652414253822842u },
+ { 18428218302589300235u, 2303443862806116547u }, { 7363877012587619542u, 1842755090244893238u },
+ { 13269799239553916280u, 1474204072195914590u }, { 10615839391643133024u, 1179363257756731672u },
+ { 2227947767661371545u, 1886981212410770676u }, { 16539753473096738529u, 1509584969928616540u },
+ { 13231802778477390823u, 1207667975942893232u }, { 6413489186596184024u, 1932268761508629172u },
+ { 16198837793502678189u, 1545815009206903337u }, { 5580372605318321905u, 1236652007365522670u },
+ { 8928596168509315048u, 1978643211784836272u }, { 18210923379033183008u, 1582914569427869017u },
+ { 7190041073742725760u, 1266331655542295214u }, { 436019273762630246u, 2026130648867672343u },
+ { 7727513048493924843u, 1620904519094137874u }, { 9871359253537050198u, 1296723615275310299u },
+ { 4726128361433549347u, 2074757784440496479u }, { 7470251503888749801u, 1659806227552397183u },
+ { 13354898832594820487u, 1327844982041917746u }, { 13989140502667892133u, 2124551971267068394u },
+ { 14880661216876224029u, 1699641577013654715u }, { 11904528973500979224u, 1359713261610923772u },
+ { 4289851098633925465u, 2175541218577478036u }, { 18189276137874781665u, 1740432974861982428u },
+ { 3483374466074094362u, 1392346379889585943u }, { 1884050330976640656u, 2227754207823337509u },
+ { 5196589079523222848u, 1782203366258670007u }, { 15225317707844309248u, 1425762693006936005u },
+ { 5913764258841343181u, 2281220308811097609u }, { 8420360221814984868u, 1824976247048878087u },
+ { 17804334621677718864u, 1459980997639102469u }, { 17932816512084085415u, 1167984798111281975u },
+ { 10245762345624985047u, 1868775676978051161u }, { 4507261061758077715u, 1495020541582440929u },
+ { 7295157664148372495u, 1196016433265952743u }, { 7982903447895485668u, 1913626293225524389u },
+ { 10075671573058298858u, 1530901034580419511u }, { 4371188443704728763u, 1224720827664335609u },
+ { 14372599139411386667u, 1959553324262936974u }, { 15187428126271019657u, 1567642659410349579u },
+ { 15839291315758726049u, 1254114127528279663u }, { 3206773216762499739u, 2006582604045247462u },
+ { 13633465017635730761u, 1605266083236197969u }, { 14596120828850494932u, 1284212866588958375u },
+ { 4907049252451240275u, 2054740586542333401u }, { 236290587219081897u, 1643792469233866721u },
+ { 14946427728742906810u, 1315033975387093376u }, { 16535586736504830250u, 2104054360619349402u },
+ { 5849771759720043554u, 1683243488495479522u }, { 15747863852001765813u, 1346594790796383617u },
+ { 10439186904235184007u, 2154551665274213788u }, { 15730047152871967852u, 1723641332219371030u },
+ { 12584037722297574282u, 1378913065775496824u }, { 9066413911450387881u, 2206260905240794919u },
+ { 10942479943902220628u, 1765008724192635935u }, { 8753983955121776503u, 1412006979354108748u },
+ { 10317025513452932081u, 2259211166966573997u }, { 874922781278525018u, 1807368933573259198u },
+ { 8078635854506640661u, 1445895146858607358u }, { 13841606313089133175u, 1156716117486885886u },
+ { 14767872471458792434u, 1850745787979017418u }, { 746251532941302978u, 1480596630383213935u },
+ { 597001226353042382u, 1184477304306571148u }, { 15712597221132509104u, 1895163686890513836u },
+ { 8880728962164096960u, 1516130949512411069u }, { 10793931984473187891u, 1212904759609928855u },
+ { 17270291175157100626u, 1940647615375886168u }, { 2748186495899949531u, 1552518092300708935u },
+ { 2198549196719959625u, 1242014473840567148u }, { 18275073973719576693u, 1987223158144907436u },
+ { 10930710364233751031u, 1589778526515925949u }, { 12433917106128911148u, 1271822821212740759u },
+ { 8826220925580526867u, 2034916513940385215u }, { 7060976740464421494u, 1627933211152308172u },
+ { 16716827836597268165u, 1302346568921846537u }, { 11989529279587987770u, 2083754510274954460u },
+ { 9591623423670390216u, 1667003608219963568u }, { 15051996368420132820u, 1333602886575970854u },
+ { 13015147745246481542u, 2133764618521553367u }, { 3033420566713364587u, 1707011694817242694u },
+ { 6116085268112601993u, 1365609355853794155u }, { 9785736428980163188u, 2184974969366070648u },
+ { 15207286772667951197u, 1747979975492856518u }, { 1097782973908629988u, 1398383980394285215u },
+ { 1756452758253807981u, 2237414368630856344u }, { 5094511021344956708u, 1789931494904685075u },
+ { 4075608817075965366u, 1431945195923748060u }, { 6520974107321544586u, 2291112313477996896u },
+ { 1527430471115325346u, 1832889850782397517u }, { 12289990821117991246u, 1466311880625918013u },
+ { 17210690286378213644u, 1173049504500734410u }, { 9090360384495590213u, 1876879207201175057u },
+ { 18340334751822203140u, 1501503365760940045u }, { 14672267801457762512u, 1201202692608752036u },
+ { 16096930852848599373u, 1921924308174003258u }, { 1809498238053148529u, 1537539446539202607u },
+ { 12515645034668249793u, 1230031557231362085u }, { 1578287981759648052u, 1968050491570179337u },
+ { 12330676829633449412u, 1574440393256143469u }, { 13553890278448669853u, 1259552314604914775u },
+ { 3239480371808320148u, 2015283703367863641u }, { 17348979556414297411u, 1612226962694290912u },
+ { 6500486015647617283u, 1289781570155432730u }, { 10400777625036187652u, 2063650512248692368u },
+ { 15699319729512770768u, 1650920409798953894u }, { 16248804598352126938u, 1320736327839163115u },
+ { 7551343283653851484u, 2113178124542660985u }, { 6041074626923081187u, 1690542499634128788u },
+ { 12211557331022285596u, 1352433999707303030u }, { 1091747655926105338u, 2163894399531684849u },
+ { 4562746939482794594u, 1731115519625347879u }, { 7339546366328145998u, 1384892415700278303u },
+ { 8053925371383123274u, 2215827865120445285u }, { 6443140297106498619u, 1772662292096356228u },
+ { 12533209867169019542u, 1418129833677084982u }, { 5295740528502789974u, 2269007733883335972u },
+ { 15304638867027962949u, 1815206187106668777u }, { 4865013464138549713u, 1452164949685335022u },
+ { 14960057215536570740u, 1161731959748268017u }, { 9178696285890871890u, 1858771135597228828u },
+ { 14721654658196518159u, 1487016908477783062u }, { 4398626097073393881u, 1189613526782226450u },
+ { 7037801755317430209u, 1903381642851562320u }, { 5630241404253944167u, 1522705314281249856u },
+ { 814844308661245011u, 1218164251424999885u }, { 1303750893857992017u, 1949062802279999816u },
+ { 15800395974054034906u, 1559250241823999852u }, { 5261619149759407279u, 1247400193459199882u },
+ { 12107939454356961969u, 1995840309534719811u }, { 5997002748743659252u, 1596672247627775849u },
+ { 8486951013736837725u, 1277337798102220679u }, { 2511075177753209390u, 2043740476963553087u },
+ { 13076906586428298482u, 1634992381570842469u }, { 14150874083884549109u, 1307993905256673975u },
+ { 4194654460505726958u, 2092790248410678361u }, { 18113118827372222859u, 1674232198728542688u },
+ { 3422448617672047318u, 1339385758982834151u }, { 16543964232501006678u, 2143017214372534641u },
+ { 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u },
+ { 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u },
+ { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u },
+ { 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u },
+ { 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u },
+ { 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u },
+ { 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u },
+ { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u },
+ { 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u },
+ { 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u },
+ { 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u },
+ { 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u },
+ { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u },
+ { 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u },
+ { 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u },
+ { 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u },
+ { 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u },
+ { 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u },
+ { 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u },
+ { 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u },
+ { 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u },
+ { 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u },
+ { 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u },
+ { 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u },
+ { 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u },
+ { 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u },
+ { 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u },
+ { 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u },
+ { 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u }
+};
+
+static const uint64_t DOUBLE_POW5_SPLIT[DOUBLE_POW5_TABLE_SIZE][2] = {
+ { 0u, 1152921504606846976u }, { 0u, 1441151880758558720u },
+ { 0u, 1801439850948198400u }, { 0u, 2251799813685248000u },
+ { 0u, 1407374883553280000u }, { 0u, 1759218604441600000u },
+ { 0u, 2199023255552000000u }, { 0u, 1374389534720000000u },
+ { 0u, 1717986918400000000u }, { 0u, 2147483648000000000u },
+ { 0u, 1342177280000000000u }, { 0u, 1677721600000000000u },
+ { 0u, 2097152000000000000u }, { 0u, 1310720000000000000u },
+ { 0u, 1638400000000000000u }, { 0u, 2048000000000000000u },
+ { 0u, 1280000000000000000u }, { 0u, 1600000000000000000u },
+ { 0u, 2000000000000000000u }, { 0u, 1250000000000000000u },
+ { 0u, 1562500000000000000u }, { 0u, 1953125000000000000u },
+ { 0u, 1220703125000000000u }, { 0u, 1525878906250000000u },
+ { 0u, 1907348632812500000u }, { 0u, 1192092895507812500u },
+ { 0u, 1490116119384765625u }, { 4611686018427387904u, 1862645149230957031u },
+ { 9799832789158199296u, 1164153218269348144u }, { 12249790986447749120u, 1455191522836685180u },
+ { 15312238733059686400u, 1818989403545856475u }, { 14528612397897220096u, 2273736754432320594u },
+ { 13692068767113150464u, 1421085471520200371u }, { 12503399940464050176u, 1776356839400250464u },
+ { 15629249925580062720u, 2220446049250313080u }, { 9768281203487539200u, 1387778780781445675u },
+ { 7598665485932036096u, 1734723475976807094u }, { 274959820560269312u, 2168404344971008868u },
+ { 9395221924704944128u, 1355252715606880542u }, { 2520655369026404352u, 1694065894508600678u },
+ { 12374191248137781248u, 2117582368135750847u }, { 14651398557727195136u, 1323488980084844279u },
+ { 13702562178731606016u, 1654361225106055349u }, { 3293144668132343808u, 2067951531382569187u },
+ { 18199116482078572544u, 1292469707114105741u }, { 8913837547316051968u, 1615587133892632177u },
+ { 15753982952572452864u, 2019483917365790221u }, { 12152082354571476992u, 1262177448353618888u },
+ { 15190102943214346240u, 1577721810442023610u }, { 9764256642163156992u, 1972152263052529513u },
+ { 17631875447420442880u, 1232595164407830945u }, { 8204786253993389888u, 1540743955509788682u },
+ { 1032610780636961552u, 1925929944387235853u }, { 2951224747111794922u, 1203706215242022408u },
+ { 3689030933889743652u, 1504632769052528010u }, { 13834660704216955373u, 1880790961315660012u },
+ { 17870034976990372916u, 1175494350822287507u }, { 17725857702810578241u, 1469367938527859384u },
+ { 3710578054803671186u, 1836709923159824231u }, { 26536550077201078u, 2295887403949780289u },
+ { 11545800389866720434u, 1434929627468612680u }, { 14432250487333400542u, 1793662034335765850u },
+ { 8816941072311974870u, 2242077542919707313u }, { 17039803216263454053u, 1401298464324817070u },
+ { 12076381983474541759u, 1751623080406021338u }, { 5872105442488401391u, 2189528850507526673u },
+ { 15199280947623720629u, 1368455531567204170u }, { 9775729147674874978u, 1710569414459005213u },
+ { 16831347453020981627u, 2138211768073756516u }, { 1296220121283337709u, 1336382355046097823u },
+ { 15455333206886335848u, 1670477943807622278u }, { 10095794471753144002u, 2088097429759527848u },
+ { 6309871544845715001u, 1305060893599704905u }, { 12499025449484531656u, 1631326116999631131u },
+ { 11012095793428276666u, 2039157646249538914u }, { 11494245889320060820u, 1274473528905961821u },
+ { 532749306367912313u, 1593091911132452277u }, { 5277622651387278295u, 1991364888915565346u },
+ { 7910200175544436838u, 1244603055572228341u }, { 14499436237857933952u, 1555753819465285426u },
+ { 8900923260467641632u, 1944692274331606783u }, { 12480606065433357876u, 1215432671457254239u },
+ { 10989071563364309441u, 1519290839321567799u }, { 9124653435777998898u, 1899113549151959749u },
+ { 8008751406574943263u, 1186945968219974843u }, { 5399253239791291175u, 1483682460274968554u },
+ { 15972438586593889776u, 1854603075343710692u }, { 759402079766405302u, 1159126922089819183u },
+ { 14784310654990170340u, 1448908652612273978u }, { 9257016281882937117u, 1811135815765342473u },
+ { 16182956370781059300u, 2263919769706678091u }, { 7808504722524468110u, 1414949856066673807u },
+ { 5148944884728197234u, 1768687320083342259u }, { 1824495087482858639u, 2210859150104177824u },
+ { 1140309429676786649u, 1381786968815111140u }, { 1425386787095983311u, 1727233711018888925u },
+ { 6393419502297367043u, 2159042138773611156u }, { 13219259225790630210u, 1349401336733506972u },
+ { 16524074032238287762u, 1686751670916883715u }, { 16043406521870471799u, 2108439588646104644u },
+ { 803757039314269066u, 1317774742903815403u }, { 14839754354425000045u, 1647218428629769253u },
+ { 4714634887749086344u, 2059023035787211567u }, { 9864175832484260821u, 1286889397367007229u },
+ { 16941905809032713930u, 1608611746708759036u }, { 2730638187581340797u, 2010764683385948796u },
+ { 10930020904093113806u, 1256727927116217997u }, { 18274212148543780162u, 1570909908895272496u },
+ { 4396021111970173586u, 1963637386119090621u }, { 5053356204195052443u, 1227273366324431638u },
+ { 15540067292098591362u, 1534091707905539547u }, { 14813398096695851299u, 1917614634881924434u },
+ { 13870059828862294966u, 1198509146801202771u }, { 12725888767650480803u, 1498136433501503464u },
+ { 15907360959563101004u, 1872670541876879330u }, { 14553786618154326031u, 1170419088673049581u },
+ { 4357175217410743827u, 1463023860841311977u }, { 10058155040190817688u, 1828779826051639971u },
+ { 7961007781811134206u, 2285974782564549964u }, { 14199001900486734687u, 1428734239102843727u },
+ { 13137066357181030455u, 1785917798878554659u }, { 11809646928048900164u, 2232397248598193324u },
+ { 16604401366885338411u, 1395248280373870827u }, { 16143815690179285109u, 1744060350467338534u },
+ { 10956397575869330579u, 2180075438084173168u }, { 6847748484918331612u, 1362547148802608230u },
+ { 17783057643002690323u, 1703183936003260287u }, { 17617136035325974999u, 2128979920004075359u },
+ { 17928239049719816230u, 1330612450002547099u }, { 17798612793722382384u, 1663265562503183874u },
+ { 13024893955298202172u, 2079081953128979843u }, { 5834715712847682405u, 1299426220705612402u },
+ { 16516766677914378815u, 1624282775882015502u }, { 11422586310538197711u, 2030353469852519378u },
+ { 11750802462513761473u, 1268970918657824611u }, { 10076817059714813937u, 1586213648322280764u },
+ { 12596021324643517422u, 1982767060402850955u }, { 5566670318688504437u, 1239229412751781847u },
+ { 2346651879933242642u, 1549036765939727309u }, { 7545000868343941206u, 1936295957424659136u },
+ { 4715625542714963254u, 1210184973390411960u }, { 5894531928393704067u, 1512731216738014950u },
+ { 16591536947346905892u, 1890914020922518687u }, { 17287239619732898039u, 1181821263076574179u },
+ { 16997363506238734644u, 1477276578845717724u }, { 2799960309088866689u, 1846595723557147156u },
+ { 10973347230035317489u, 1154122327223216972u }, { 13716684037544146861u, 1442652909029021215u },
+ { 12534169028502795672u, 1803316136286276519u }, { 11056025267201106687u, 2254145170357845649u },
+ { 18439230838069161439u, 1408840731473653530u }, { 13825666510731675991u, 1761050914342066913u },
+ { 3447025083132431277u, 2201313642927583642u }, { 6766076695385157452u, 1375821026829739776u },
+ { 8457595869231446815u, 1719776283537174720u }, { 10571994836539308519u, 2149720354421468400u },
+ { 6607496772837067824u, 1343575221513417750u }, { 17482743002901110588u, 1679469026891772187u },
+ { 17241742735199000331u, 2099336283614715234u }, { 15387775227926763111u, 1312085177259197021u },
+ { 5399660979626290177u, 1640106471573996277u }, { 11361262242960250625u, 2050133089467495346u },
+ { 11712474920277544544u, 1281333180917184591u }, { 10028907631919542777u, 1601666476146480739u },
+ { 7924448521472040567u, 2002083095183100924u }, { 14176152362774801162u, 1251301934489438077u },
+ { 3885132398186337741u, 1564127418111797597u }, { 9468101516160310080u, 1955159272639746996u },
+ { 15140935484454969608u, 1221974545399841872u }, { 479425281859160394u, 1527468181749802341u },
+ { 5210967620751338397u, 1909335227187252926u }, { 17091912818251750210u, 1193334516992033078u },
+ { 12141518985959911954u, 1491668146240041348u }, { 15176898732449889943u, 1864585182800051685u },
+ { 11791404716994875166u, 1165365739250032303u }, { 10127569877816206054u, 1456707174062540379u },
+ { 8047776328842869663u, 1820883967578175474u }, { 836348374198811271u, 2276104959472719343u },
+ { 7440246761515338900u, 1422565599670449589u }, { 13911994470321561530u, 1778206999588061986u },
+ { 8166621051047176104u, 2222758749485077483u }, { 2798295147690791113u, 1389224218428173427u },
+ { 17332926989895652603u, 1736530273035216783u }, { 17054472718942177850u, 2170662841294020979u },
+ { 8353202440125167204u, 1356664275808763112u }, { 10441503050156459005u, 1695830344760953890u },
+ { 3828506775840797949u, 2119787930951192363u }, { 86973725686804766u, 1324867456844495227u },
+ { 13943775212390669669u, 1656084321055619033u }, { 3594660960206173375u, 2070105401319523792u },
+ { 2246663100128858359u, 1293815875824702370u }, { 12031700912015848757u, 1617269844780877962u },
+ { 5816254103165035138u, 2021587305976097453u }, { 5941001823691840913u, 1263492066235060908u },
+ { 7426252279614801142u, 1579365082793826135u }, { 4671129331091113523u, 1974206353492282669u },
+ { 5225298841145639904u, 1233878970932676668u }, { 6531623551432049880u, 1542348713665845835u },
+ { 3552843420862674446u, 1927935892082307294u }, { 16055585193321335241u, 1204959932551442058u },
+ { 10846109454796893243u, 1506199915689302573u }, { 18169322836923504458u, 1882749894611628216u },
+ { 11355826773077190286u, 1176718684132267635u }, { 9583097447919099954u, 1470898355165334544u },
+ { 11978871809898874942u, 1838622943956668180u }, { 14973589762373593678u, 2298278679945835225u },
+ { 2440964573842414192u, 1436424174966147016u }, { 3051205717303017741u, 1795530218707683770u },
+ { 13037379183483547984u, 2244412773384604712u }, { 8148361989677217490u, 1402757983365377945u },
+ { 14797138505523909766u, 1753447479206722431u }, { 13884737113477499304u, 2191809349008403039u },
+ { 15595489723564518921u, 1369880843130251899u }, { 14882676136028260747u, 1712351053912814874u },
+ { 9379973133180550126u, 2140438817391018593u }, { 17391698254306313589u, 1337774260869386620u },
+ { 3292878744173340370u, 1672217826086733276u }, { 4116098430216675462u, 2090272282608416595u },
+ { 266718509671728212u, 1306420176630260372u }, { 333398137089660265u, 1633025220787825465u },
+ { 5028433689789463235u, 2041281525984781831u }, { 10060300083759496378u, 1275800953740488644u },
+ { 12575375104699370472u, 1594751192175610805u }, { 1884160825592049379u, 1993438990219513507u },
+ { 17318501580490888525u, 1245899368887195941u }, { 7813068920331446945u, 1557374211108994927u },
+ { 5154650131986920777u, 1946717763886243659u }, { 915813323278131534u, 1216698602428902287u },
+ { 14979824709379828129u, 1520873253036127858u }, { 9501408849870009354u, 1901091566295159823u },
+ { 12855909558809837702u, 1188182228934474889u }, { 2234828893230133415u, 1485227786168093612u },
+ { 2793536116537666769u, 1856534732710117015u }, { 8663489100477123587u, 1160334207943823134u },
+ { 1605989338741628675u, 1450417759929778918u }, { 11230858710281811652u, 1813022199912223647u },
+ { 9426887369424876662u, 2266277749890279559u }, { 12809333633531629769u, 1416423593681424724u },
+ { 16011667041914537212u, 1770529492101780905u }, { 6179525747111007803u, 2213161865127226132u },
+ { 13085575628799155685u, 1383226165704516332u }, { 16356969535998944606u, 1729032707130645415u },
+ { 15834525901571292854u, 2161290883913306769u }, { 2979049660840976177u, 1350806802445816731u },
+ { 17558870131333383934u, 1688508503057270913u }, { 8113529608884566205u, 2110635628821588642u },
+ { 9682642023980241782u, 1319147268013492901u }, { 16714988548402690132u, 1648934085016866126u },
+ { 11670363648648586857u, 2061167606271082658u }, { 11905663298832754689u, 1288229753919426661u },
+ { 1047021068258779650u, 1610287192399283327u }, { 15143834390605638274u, 2012858990499104158u },
+ { 4853210475701136017u, 1258036869061940099u }, { 1454827076199032118u, 1572546086327425124u },
+ { 1818533845248790147u, 1965682607909281405u }, { 3442426662494187794u, 1228551629943300878u },
+ { 13526405364972510550u, 1535689537429126097u }, { 3072948650933474476u, 1919611921786407622u },
+ { 15755650962115585259u, 1199757451116504763u }, { 15082877684217093670u, 1499696813895630954u },
+ { 9630225068416591280u, 1874621017369538693u }, { 8324733676974063502u, 1171638135855961683u },
+ { 5794231077790191473u, 1464547669819952104u }, { 7242788847237739342u, 1830684587274940130u },
+ { 18276858095901949986u, 2288355734093675162u }, { 16034722328366106645u, 1430222333808546976u },
+ { 1596658836748081690u, 1787777917260683721u }, { 6607509564362490017u, 2234722396575854651u },
+ { 1823850468512862308u, 1396701497859909157u }, { 6891499104068465790u, 1745876872324886446u },
+ { 17837745916940358045u, 2182346090406108057u }, { 4231062170446641922u, 1363966306503817536u },
+ { 5288827713058302403u, 1704957883129771920u }, { 6611034641322878003u, 2131197353912214900u },
+ { 13355268687681574560u, 1331998346195134312u }, { 16694085859601968200u, 1664997932743917890u },
+ { 11644235287647684442u, 2081247415929897363u }, { 4971804045566108824u, 1300779634956185852u },
+ { 6214755056957636030u, 1625974543695232315u }, { 3156757802769657134u, 2032468179619040394u },
+ { 6584659645158423613u, 1270292612261900246u }, { 17454196593302805324u, 1587865765327375307u },
+ { 17206059723201118751u, 1984832206659219134u }, { 6142101308573311315u, 1240520129162011959u },
+ { 3065940617289251240u, 1550650161452514949u }, { 8444111790038951954u, 1938312701815643686u },
+ { 665883850346957067u, 1211445438634777304u }, { 832354812933696334u, 1514306798293471630u },
+ { 10263815553021896226u, 1892883497866839537u }, { 17944099766707154901u, 1183052186166774710u },
+ { 13206752671529167818u, 1478815232708468388u }, { 16508440839411459773u, 1848519040885585485u },
+ { 12623618533845856310u, 1155324400553490928u }, { 15779523167307320387u, 1444155500691863660u },
+ { 1277659885424598868u, 1805194375864829576u }, { 1597074856780748586u, 2256492969831036970u },
+ { 5609857803915355770u, 1410308106144398106u }, { 16235694291748970521u, 1762885132680497632u },
+ { 1847873790976661535u, 2203606415850622041u }, { 12684136165428883219u, 1377254009906638775u },
+ { 11243484188358716120u, 1721567512383298469u }, { 219297180166231438u, 2151959390479123087u },
+ { 7054589765244976505u, 1344974619049451929u }, { 13429923224983608535u, 1681218273811814911u },
+ { 12175718012802122765u, 2101522842264768639u }, { 14527352785642408584u, 1313451776415480399u },
+ { 13547504963625622826u, 1641814720519350499u }, { 12322695186104640628u, 2052268400649188124u },
+ { 16925056528170176201u, 1282667750405742577u }, { 7321262604930556539u, 1603334688007178222u },
+ { 18374950293017971482u, 2004168360008972777u }, { 4566814905495150320u, 1252605225005607986u },
+ { 14931890668723713708u, 1565756531257009982u }, { 9441491299049866327u, 1957195664071262478u },
+ { 1289246043478778550u, 1223247290044539049u }, { 6223243572775861092u, 1529059112555673811u },
+ { 3167368447542438461u, 1911323890694592264u }, { 1979605279714024038u, 1194577431684120165u },
+ { 7086192618069917952u, 1493221789605150206u }, { 18081112809442173248u, 1866527237006437757u },
+ { 13606538515115052232u, 1166579523129023598u }, { 7784801107039039482u, 1458224403911279498u },
+ { 507629346944023544u, 1822780504889099373u }, { 5246222702107417334u, 2278475631111374216u },
+ { 3278889188817135834u, 1424047269444608885u }, { 8710297504448807696u, 1780059086805761106u }
+};
+
+// IEEE 754 double precision constants
+#define DOUBLE_MANTISSA_BITS 52
+#define DOUBLE_EXPONENT_BITS 11
+#define DOUBLE_EXPONENT_BIAS 1023
+
+// Helper: floor(log2(value)) using ryu_leading_zeros64
+static inline uint32_t floor_log2(const uint64_t value) {
+ return 63 - ryu_leading_zeros64(value);
+}
+
+// Helper: log2(5^e) approximation
+static inline int32_t log2pow5(const int32_t e) {
+ return (int32_t) ((((uint32_t) e) * 1217359) >> 19);
+}
+
+// Helper: ceil(log2(5^e))
+static inline int32_t ceil_log2pow5(const int32_t e) {
+ return log2pow5(e) + 1;
+}
+
+// Helper: max of two int32
+static inline int32_t max32(int32_t a, int32_t b) {
+ return a < b ? b : a;
+}
+
+// Helper: convert uint64 bits to double
+static inline double int64Bits2Double(uint64_t bits) {
+ double f;
+ memcpy(&f, &bits, sizeof(double));
+ return f;
+}
+
+// Check if value is multiple of 2^p
+static inline bool multipleOfPowerOf2(const uint64_t value, const uint32_t p) {
+ return (value & ((1ull << p) - 1)) == 0;
+}
+
+// Count how many times value is divisible by 5
+// Uses modular inverse to avoid expensive division
+static inline uint32_t pow5Factor(uint64_t value) {
+ const uint64_t m_inv_5 = 14757395258967641293u; // 5 * m_inv_5 = 1 (mod 2^64)
+ const uint64_t n_div_5 = 3689348814741910323u; // 2^64 / 5
+ uint32_t count = 0;
+ for (;;) {
+ value *= m_inv_5;
+ if (value > n_div_5)
+ break;
+ ++count;
+ }
+ return count;
+}
+
+// Check if value is multiple of 5^p
+// Optimized: uses modular inverse instead of division
+static inline bool multipleOfPowerOf5(const uint64_t value, const uint32_t p) {
+ return pow5Factor(value) >= p;
+}
+
+// 128-bit multiplication with shift
+// This is the core operation for converting decimal to binary
+#if defined(__SIZEOF_INT128__)
+// Use native 128-bit integers if available (GCC/Clang)
+static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
+ const unsigned __int128 b0 = ((unsigned __int128) m) * mul[0];
+ const unsigned __int128 b2 = ((unsigned __int128) m) * mul[1];
+ return (uint64_t) (((b0 >> 64) + b2) >> (j - 64));
+}
+#else
+// Fallback for systems without 128-bit integers
+static inline uint64_t umul128(const uint64_t a, const uint64_t b, uint64_t* const productHi) {
+ const uint32_t aLo = (uint32_t)a;
+ const uint32_t aHi = (uint32_t)(a >> 32);
+ const uint32_t bLo = (uint32_t)b;
+ const uint32_t bHi = (uint32_t)(b >> 32);
+
+ const uint64_t b00 = (uint64_t)aLo * bLo;
+ const uint64_t b01 = (uint64_t)aLo * bHi;
+ const uint64_t b10 = (uint64_t)aHi * bLo;
+ const uint64_t b11 = (uint64_t)aHi * bHi;
+
+ const uint32_t b00Lo = (uint32_t)b00;
+ const uint32_t b00Hi = (uint32_t)(b00 >> 32);
+
+ const uint64_t mid1 = b10 + b00Hi;
+ const uint32_t mid1Lo = (uint32_t)(mid1);
+ const uint32_t mid1Hi = (uint32_t)(mid1 >> 32);
+
+ const uint64_t mid2 = b01 + mid1Lo;
+ const uint32_t mid2Lo = (uint32_t)(mid2);
+ const uint32_t mid2Hi = (uint32_t)(mid2 >> 32);
+
+ const uint64_t pHi = b11 + mid1Hi + mid2Hi;
+ const uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo;
+
+ *productHi = pHi;
+ return pLo;
+}
+
+static inline uint64_t shiftright128(const uint64_t lo, const uint64_t hi, const uint32_t dist) {
+ return (hi << (64 - dist)) | (lo >> dist);
+}
+
+static inline uint64_t mulShift64(const uint64_t m, const uint64_t* const mul, const int32_t j) {
+ uint64_t high1;
+ const uint64_t low1 = umul128(m, mul[1], &high1);
+ uint64_t high0;
+ umul128(m, mul[0], &high0);
+ const uint64_t sum = high0 + low1;
+ if (sum < high0) {
+ ++high1;
+ }
+ return shiftright128(sum, high1, j - 64);
+}
+#endif
+
+// Main conversion function: decimal mantissa+exponent to IEEE 754 double
+// Optimized for JSON parsing with fast paths for edge cases
+static inline double ryu_s2d_from_parts(uint64_t m10, int m10digits, int32_t e10, bool signedM) {
+ // Fast path: handle zero explicitly (e.g., "0.0", "0e0")
+ if (m10 == 0) {
+ return int64Bits2Double(((uint64_t) signedM) << 63);
+ }
+
+ // Fast path: handle overflow/underflow early
+ if (m10digits + e10 <= -324) {
+ // Underflow to zero
+ return int64Bits2Double(((uint64_t) signedM) << 63);
+ }
+
+ if (m10digits + e10 >= 310) {
+ // Overflow to infinity
+ return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL);
+ }
+
+ // Convert decimal to binary: m10 * 10^e10 = m2 * 2^e2
+ int32_t e2;
+ uint64_t m2;
+ bool trailingZeros;
+
+ if (e10 >= 0) {
+ // Positive exponent: multiply by 5^e10 and adjust binary exponent
+ e2 = floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1);
+ int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT;
+ m2 = mulShift64(m10, DOUBLE_POW5_SPLIT[e10], j);
+ trailingZeros = e2 < e10 || (e2 - e10 < 64 && multipleOfPowerOf2(m10, e2 - e10));
+ } else {
+ // Negative exponent: divide by 5^(-e10)
+ e2 = floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1);
+ int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT;
+ m2 = mulShift64(m10, DOUBLE_POW5_INV_SPLIT[-e10], j);
+ trailingZeros = multipleOfPowerOf5(m10, -e10);
+ }
+
+ // Compute IEEE 754 exponent
+ uint32_t ieee_e2 = (uint32_t) max32(0, e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2));
+
+ if (ieee_e2 > 0x7fe) {
+ // Overflow to infinity
+ return int64Bits2Double((((uint64_t) signedM) << 63) | 0x7ff0000000000000ULL);
+ }
+
+ // Compute shift amount for rounding
+ int32_t shift = (ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS;
+
+ // IEEE 754 round-to-even (banker's rounding)
+ trailingZeros &= (m2 & ((1ull << (shift - 1)) - 1)) == 0;
+ uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1;
+ bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0));
+
+ uint64_t ieee_m2 = (m2 >> shift) + roundUp;
+ ieee_m2 &= (1ull << DOUBLE_MANTISSA_BITS) - 1;
+
+ if (ieee_m2 == 0 && roundUp) {
+ ieee_e2++;
+ }
+
+ // Pack sign, exponent, and mantissa into IEEE 754 format
+ // Match original Ryu: group sign+exponent, then shift and add mantissa
+ uint64_t ieee = (((((uint64_t) signedM) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2;
+ return int64Bits2Double(ieee);
+}
+
+#endif // RYU_H