diff options
| author | Jean Boussier <jean.boussier@gmail.com> | 2025-12-04 09:06:10 +0100 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2025-12-04 08:10:41 +0000 |
| commit | d58a45d32ffe8afed1685e54017fb81cea898867 (patch) | |
| tree | 0e0905a1fd74b77ffb7dfaae7f182222acd5fca1 | |
| parent | 932762f29457ad1def6fbab7eca7bcbeeb58ea5c (diff) | |
[ruby/json] Fix a regression in parsing of unicode surogate pairs
Fix: https://github.com/ruby/json/issues/912
In the case of surogate pairs we consume two backslashes, so
`json_next_backslash` need to ensure it's not sending us back in the
stream.
https://github.com/ruby/json/commit/0fce370c41
| -rw-r--r-- | ext/json/parser/parser.c | 4 | ||||
| -rw-r--r-- | test/json/json_parser_test.rb | 7 |
2 files changed, 10 insertions, 1 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 5b7cd835cd..c84c7ed660 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -651,7 +651,9 @@ static inline const char *json_next_backslash(const char *pe, const char *string positions->size--; const char *next_position = positions->positions[0]; positions->positions++; - return next_position; + if (next_position >= pe) { + return next_position; + } } if (positions->has_more) { diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 3e662bda32..257e4f1736 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -325,6 +325,13 @@ class JSONParserTest < Test::Unit::TestCase assert_raise(JSON::ParserError) { parse('"\u111___"') } end + def test_unicode_followed_by_newline + # Ref: https://github.com/ruby/json/issues/912 + assert_equal "🌌\n".bytes, JSON.parse('"\ud83c\udf0c\n"').bytes + assert_equal "🌌\n", JSON.parse('"\ud83c\udf0c\n"') + assert_predicate JSON.parse('"\ud83c\udf0c\n"'), :valid_encoding? + end + def test_invalid_surogates assert_raise(JSON::ParserError) { parse('"\\uD800"') } assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') } |
