summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2025-08-23 19:57:14 +0200
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2025-08-27 10:01:52 +0900
commitd9e9a667a8c8fb6f57611c68b45eaf1f2c39fca1 (patch)
tree3ac8241bb5b0bc69574d2496324e0ec14a730731
parent0e0f0dfd070fc156ec74c58f44d86a884a0580e0 (diff)
JSON.generate: warn or raise on duplicated key
Because both strings and symbols keys are serialized the same, it always has been possible to generate documents with duplicated keys: ```ruby >> puts JSON.generate({ foo: 1, "foo" => 2 }) {"foo":1,"foo":2} ``` This is pretty much always a mistake and can cause various issues because it's not guaranteed how various JSON parsers will handle this. Until now I didn't think it was possible to catch such case without tanking performance, hence why I only made the parser more strict. But I finally found a way to check for duplicated keys cheaply enough.
-rw-r--r--ext/json/fbuffer/fbuffer.h8
-rw-r--r--ext/json/generator/generator.c73
-rw-r--r--ext/json/lib/json/common.rb19
-rw-r--r--ext/json/lib/json/ext/generator/state.rb5
-rw-r--r--ext/json/parser/parser.c2
-rwxr-xr-xtest/json/json_generator_test.rb38
6 files changed, 138 insertions, 7 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h
index 247a0de470..d5fd8ac6d7 100644
--- a/ext/json/fbuffer/fbuffer.h
+++ b/ext/json/fbuffer/fbuffer.h
@@ -24,6 +24,14 @@ typedef unsigned char _Bool;
#endif
#endif
+#ifndef NOINLINE
+#if defined(__has_attribute) && __has_attribute(noinline)
+#define NOINLINE() __attribute__((noinline))
+#else
+#define NOINLINE()
+#endif
+#endif
+
#ifndef RB_UNLIKELY
#define RB_UNLIKELY(expr) expr
#endif
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index 52dcd24f0e..d810927c58 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -9,6 +9,12 @@
/* ruby api and some helpers */
+enum duplicate_key_action {
+ JSON_DEPRECATED = 0,
+ JSON_IGNORE,
+ JSON_RAISE,
+};
+
typedef struct JSON_Generator_StateStruct {
VALUE indent;
VALUE space;
@@ -21,6 +27,8 @@ typedef struct JSON_Generator_StateStruct {
long depth;
long buffer_initial_length;
+ enum duplicate_key_action on_duplicate_key;
+
bool allow_nan;
bool ascii_only;
bool script_safe;
@@ -34,7 +42,7 @@ typedef struct JSON_Generator_StateStruct {
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
-static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
+static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
@@ -987,8 +995,11 @@ static inline VALUE vstate_get(struct generate_json_data *data)
}
struct hash_foreach_arg {
+ VALUE hash;
struct generate_json_data *data;
- int iter;
+ int first_key_type;
+ bool first;
+ bool mixed_keys_encountered;
};
static VALUE
@@ -1006,6 +1017,22 @@ convert_string_subclass(VALUE key)
return key_to_s;
}
+NOINLINE()
+static void
+json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
+{
+ if (arg->mixed_keys_encountered) {
+ return;
+ }
+ arg->mixed_keys_encountered = true;
+
+ JSON_Generator_State *state = arg->data->state;
+ if (state->on_duplicate_key != JSON_IGNORE) {
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
+ }
+}
+
static int
json_object_i(VALUE key, VALUE val, VALUE _arg)
{
@@ -1016,8 +1043,16 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
JSON_Generator_State *state = data->state;
long depth = state->depth;
+ int key_type = rb_type(key);
+
+ if (arg->first) {
+ arg->first = false;
+ arg->first_key_type = key_type;
+ }
+ else {
+ fbuffer_append_char(buffer, ',');
+ }
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
if (RB_UNLIKELY(data->state->object_nl)) {
fbuffer_append_str(buffer, data->state->object_nl);
}
@@ -1029,8 +1064,12 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
bool as_json_called = false;
start:
- switch (rb_type(key)) {
+ switch (key_type) {
case T_STRING:
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
+ json_inspect_hash_with_mixed_keys(arg);
+ }
+
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
key_to_s = key;
} else {
@@ -1038,12 +1077,17 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
}
break;
case T_SYMBOL:
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
+ json_inspect_hash_with_mixed_keys(arg);
+ }
+
key_to_s = rb_sym2str(key);
break;
default:
if (data->state->strict) {
if (RTEST(data->state->as_json) && !as_json_called) {
key = rb_proc_call_with_block(data->state->as_json, 1, &key, Qnil);
+ key_type = rb_type(key);
as_json_called = true;
goto start;
} else {
@@ -1064,7 +1108,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
generate_json(buffer, data, val);
- arg->iter++;
return ST_CONTINUE;
}
@@ -1091,8 +1134,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
fbuffer_append_char(buffer, '{');
struct hash_foreach_arg arg = {
+ .hash = obj,
.data = data,
- .iter = 0,
+ .first = true,
};
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
@@ -1794,6 +1838,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
return Qnil;
}
+static VALUE cState_allow_duplicate_key_p(VALUE self)
+{
+ GET_STATE(self);
+ switch (state->on_duplicate_key) {
+ case JSON_IGNORE:
+ return Qtrue;
+ case JSON_DEPRECATED:
+ return Qnil;
+ case JSON_RAISE:
+ return Qfalse;
+ }
+}
+
/*
* call-seq: depth
*
@@ -1883,6 +1940,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
else if (key == sym_strict) { state->strict = RTEST(val); }
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_as_json) {
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
state_write_value(data, &state->as_json, proc);
@@ -2008,6 +2066,8 @@ void Init_generator(void)
rb_define_method(cState, "generate", cState_generate, -1);
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
+
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
@@ -2072,6 +2132,7 @@ void Init_generator(void)
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
sym_strict = ID2SYM(rb_intern("strict"));
sym_as_json = ID2SYM(rb_intern("as_json"));
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
usascii_encindex = rb_usascii_encindex();
utf8_encindex = rb_utf8_encindex();
diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb
index 2859a8c63f..45200a83bc 100644
--- a/ext/json/lib/json/common.rb
+++ b/ext/json/lib/json/common.rb
@@ -186,6 +186,25 @@ module JSON
private
+ # Called from the extension when a hash has both string and symbol keys
+ def on_mixed_keys_hash(hash, do_raise)
+ set = {}
+ hash.each_key do |key|
+ key_str = key.to_s
+
+ if set[key_str]
+ message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}"
+ if do_raise
+ raise GeneratorError, message
+ else
+ deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`")
+ end
+ else
+ set[key_str] = true
+ end
+ end
+ end
+
def deprecated_singleton_attr_accessor(*attrs)
args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : ""
attrs.each do |attr|
diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb
index 2ec2daa202..1f56e6c682 100644
--- a/ext/json/lib/json/ext/generator/state.rb
+++ b/ext/json/lib/json/ext/generator/state.rb
@@ -56,6 +56,11 @@ module JSON
buffer_initial_length: buffer_initial_length,
}
+ allow_duplicate_key = allow_duplicate_key?
+ unless allow_duplicate_key.nil?
+ result[:allow_duplicate_key] = allow_duplicate_key
+ end
+
instance_variables.each do |iv|
iv = iv.to_s[1..-1]
result[iv.to_sym] = self[iv]
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 496a769206..e34f1999d5 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1314,7 +1314,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
else if (key == sym_freeze) { config->freeze = RTEST(val); }
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
- else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_decimal_class) {
if (RTEST(val)) {
if (rb_respond_to(val, i_try_convert)) {
diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb
index 963350ea49..4315d109d8 100755
--- a/test/json/json_generator_test.rb
+++ b/test/json/json_generator_test.rb
@@ -234,6 +234,24 @@ class JSONGeneratorTest < Test::Unit::TestCase
:space => "",
:space_before => "",
}.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s })
+
+ state = JSON::State.new(allow_duplicate_key: true)
+ assert_equal({
+ :allow_duplicate_key => true,
+ :allow_nan => false,
+ :array_nl => "",
+ :as_json => false,
+ :ascii_only => false,
+ :buffer_initial_length => 1024,
+ :depth => 0,
+ :script_safe => false,
+ :strict => false,
+ :indent => "",
+ :max_nesting => 100,
+ :object_nl => "",
+ :space => "",
+ :space_before => "",
+ }.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s })
end
def test_allow_nan
@@ -828,4 +846,24 @@ class JSONGeneratorTest < Test::Unit::TestCase
assert_equal "[#{number}]", JSON.generate([number])
end
end
+
+ def test_generate_duplicate_keys_allowed
+ hash = { foo: 1, "foo" => 2 }
+ assert_equal %({"foo":1,"foo":2}), JSON.generate(hash, allow_duplicate_key: true)
+ end
+
+ def test_generate_duplicate_keys_deprecated
+ hash = { foo: 1, "foo" => 2 }
+ assert_deprecated_warning(/allow_duplicate_key/) do
+ assert_equal %({"foo":1,"foo":2}), JSON.generate(hash)
+ end
+ end
+
+ def test_generate_duplicate_keys_disallowed
+ hash = { foo: 1, "foo" => 2 }
+ error = assert_raise JSON::GeneratorError do
+ JSON.generate(hash, allow_duplicate_key: false)
+ end
+ assert_equal %(detected duplicate key "foo" in #{hash.inspect}), error.message
+ end
end