diff options
| author | Jean Boussier <jean.boussier@gmail.com> | 2025-08-23 19:57:14 +0200 |
|---|---|---|
| committer | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2025-08-27 10:01:52 +0900 |
| commit | d9e9a667a8c8fb6f57611c68b45eaf1f2c39fca1 (patch) | |
| tree | 3ac8241bb5b0bc69574d2496324e0ec14a730731 | |
| parent | 0e0f0dfd070fc156ec74c58f44d86a884a0580e0 (diff) | |
JSON.generate: warn or raise on duplicated key
Because both strings and symbols keys are serialized the same,
it always has been possible to generate documents with duplicated
keys:
```ruby
>> puts JSON.generate({ foo: 1, "foo" => 2 })
{"foo":1,"foo":2}
```
This is pretty much always a mistake and can cause various
issues because it's not guaranteed how various JSON parsers
will handle this.
Until now I didn't think it was possible to catch such case without
tanking performance, hence why I only made the parser more strict.
But I finally found a way to check for duplicated keys cheaply enough.
| -rw-r--r-- | ext/json/fbuffer/fbuffer.h | 8 | ||||
| -rw-r--r-- | ext/json/generator/generator.c | 73 | ||||
| -rw-r--r-- | ext/json/lib/json/common.rb | 19 | ||||
| -rw-r--r-- | ext/json/lib/json/ext/generator/state.rb | 5 | ||||
| -rw-r--r-- | ext/json/parser/parser.c | 2 | ||||
| -rwxr-xr-x | test/json/json_generator_test.rb | 38 |
6 files changed, 138 insertions, 7 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index 247a0de470..d5fd8ac6d7 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -24,6 +24,14 @@ typedef unsigned char _Bool; #endif #endif +#ifndef NOINLINE +#if defined(__has_attribute) && __has_attribute(noinline) +#define NOINLINE() __attribute__((noinline)) +#else +#define NOINLINE() +#endif +#endif + #ifndef RB_UNLIKELY #define RB_UNLIKELY(expr) expr #endif diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 52dcd24f0e..d810927c58 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -9,6 +9,12 @@ /* ruby api and some helpers */ +enum duplicate_key_action { + JSON_DEPRECATED = 0, + JSON_IGNORE, + JSON_RAISE, +}; + typedef struct JSON_Generator_StateStruct { VALUE indent; VALUE space; @@ -21,6 +27,8 @@ typedef struct JSON_Generator_StateStruct { long depth; long buffer_initial_length; + enum duplicate_key_action on_duplicate_key; + bool allow_nan; bool ascii_only; bool script_safe; @@ -34,7 +42,7 @@ typedef struct JSON_Generator_StateStruct { static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; -static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, +static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; @@ -987,8 +995,11 @@ static inline VALUE vstate_get(struct generate_json_data *data) } struct hash_foreach_arg { + VALUE hash; struct generate_json_data *data; - int iter; + int first_key_type; + bool first; + bool mixed_keys_encountered; }; static VALUE @@ -1006,6 +1017,22 @@ convert_string_subclass(VALUE key) return key_to_s; } +NOINLINE() +static void +json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg) +{ + if (arg->mixed_keys_encountered) { + return; + } + arg->mixed_keys_encountered = true; + + JSON_Generator_State *state = arg->data->state; + if (state->on_duplicate_key != JSON_IGNORE) { + VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse; + rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise); + } +} + static int json_object_i(VALUE key, VALUE val, VALUE _arg) { @@ -1016,8 +1043,16 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) JSON_Generator_State *state = data->state; long depth = state->depth; + int key_type = rb_type(key); + + if (arg->first) { + arg->first = false; + arg->first_key_type = key_type; + } + else { + fbuffer_append_char(buffer, ','); + } - if (arg->iter > 0) fbuffer_append_char(buffer, ','); if (RB_UNLIKELY(data->state->object_nl)) { fbuffer_append_str(buffer, data->state->object_nl); } @@ -1029,8 +1064,12 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) bool as_json_called = false; start: - switch (rb_type(key)) { + switch (key_type) { case T_STRING: + if (RB_UNLIKELY(arg->first_key_type != T_STRING)) { + json_inspect_hash_with_mixed_keys(arg); + } + if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) { key_to_s = key; } else { @@ -1038,12 +1077,17 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) } break; case T_SYMBOL: + if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) { + json_inspect_hash_with_mixed_keys(arg); + } + key_to_s = rb_sym2str(key); break; default: if (data->state->strict) { if (RTEST(data->state->as_json) && !as_json_called) { key = rb_proc_call_with_block(data->state->as_json, 1, &key, Qnil); + key_type = rb_type(key); as_json_called = true; goto start; } else { @@ -1064,7 +1108,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space); generate_json(buffer, data, val); - arg->iter++; return ST_CONTINUE; } @@ -1091,8 +1134,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat fbuffer_append_char(buffer, '{'); struct hash_foreach_arg arg = { + .hash = obj, .data = data, - .iter = 0, + .first = true, }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); @@ -1794,6 +1838,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable) return Qnil; } +static VALUE cState_allow_duplicate_key_p(VALUE self) +{ + GET_STATE(self); + switch (state->on_duplicate_key) { + case JSON_IGNORE: + return Qtrue; + case JSON_DEPRECATED: + return Qnil; + case JSON_RAISE: + return Qfalse; + } +} + /* * call-seq: depth * @@ -1883,6 +1940,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg) else if (key == sym_script_safe) { state->script_safe = RTEST(val); } else if (key == sym_escape_slash) { state->script_safe = RTEST(val); } else if (key == sym_strict) { state->strict = RTEST(val); } + else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } else if (key == sym_as_json) { VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; state_write_value(data, &state->as_json, proc); @@ -2008,6 +2066,8 @@ void Init_generator(void) rb_define_method(cState, "generate", cState_generate, -1); rb_define_alias(cState, "generate_new", "generate"); // :nodoc: + rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); @@ -2072,6 +2132,7 @@ void Init_generator(void) sym_escape_slash = ID2SYM(rb_intern("escape_slash")); sym_strict = ID2SYM(rb_intern("strict")); sym_as_json = ID2SYM(rb_intern("as_json")); + sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 2859a8c63f..45200a83bc 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -186,6 +186,25 @@ module JSON private + # Called from the extension when a hash has both string and symbol keys + def on_mixed_keys_hash(hash, do_raise) + set = {} + hash.each_key do |key| + key_str = key.to_s + + if set[key_str] + message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}" + if do_raise + raise GeneratorError, message + else + deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`") + end + else + set[key_str] = true + end + end + end + def deprecated_singleton_attr_accessor(*attrs) args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : "" attrs.each do |attr| diff --git a/ext/json/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb index 2ec2daa202..1f56e6c682 100644 --- a/ext/json/lib/json/ext/generator/state.rb +++ b/ext/json/lib/json/ext/generator/state.rb @@ -56,6 +56,11 @@ module JSON buffer_initial_length: buffer_initial_length, } + allow_duplicate_key = allow_duplicate_key? + unless allow_duplicate_key.nil? + result[:allow_duplicate_key] = allow_duplicate_key + end + instance_variables.each do |iv| iv = iv.to_s[1..-1] result[iv.to_sym] = self[iv] diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 496a769206..e34f1999d5 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1314,7 +1314,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } else if (key == sym_freeze) { config->freeze = RTEST(val); } else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } - else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } else if (key == sym_decimal_class) { if (RTEST(val)) { if (rb_respond_to(val, i_try_convert)) { diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 963350ea49..4315d109d8 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -234,6 +234,24 @@ class JSONGeneratorTest < Test::Unit::TestCase :space => "", :space_before => "", }.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s }) + + state = JSON::State.new(allow_duplicate_key: true) + assert_equal({ + :allow_duplicate_key => true, + :allow_nan => false, + :array_nl => "", + :as_json => false, + :ascii_only => false, + :buffer_initial_length => 1024, + :depth => 0, + :script_safe => false, + :strict => false, + :indent => "", + :max_nesting => 100, + :object_nl => "", + :space => "", + :space_before => "", + }.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s }) end def test_allow_nan @@ -828,4 +846,24 @@ class JSONGeneratorTest < Test::Unit::TestCase assert_equal "[#{number}]", JSON.generate([number]) end end + + def test_generate_duplicate_keys_allowed + hash = { foo: 1, "foo" => 2 } + assert_equal %({"foo":1,"foo":2}), JSON.generate(hash, allow_duplicate_key: true) + end + + def test_generate_duplicate_keys_deprecated + hash = { foo: 1, "foo" => 2 } + assert_deprecated_warning(/allow_duplicate_key/) do + assert_equal %({"foo":1,"foo":2}), JSON.generate(hash) + end + end + + def test_generate_duplicate_keys_disallowed + hash = { foo: 1, "foo" => 2 } + error = assert_raise JSON::GeneratorError do + JSON.generate(hash, allow_duplicate_key: false) + end + assert_equal %(detected duplicate key "foo" in #{hash.inspect}), error.message + end end |
