From d58a45d32ffe8afed1685e54017fb81cea898867 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 4 Dec 2025 09:06:10 +0100 Subject: [ruby/json] Fix a regression in parsing of unicode surogate pairs Fix: https://github.com/ruby/json/issues/912 In the case of surogate pairs we consume two backslashes, so `json_next_backslash` need to ensure it's not sending us back in the stream. https://github.com/ruby/json/commit/0fce370c41 --- ext/json/parser/parser.c | 4 +++- test/json/json_parser_test.rb | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 5b7cd835cd..c84c7ed660 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -651,7 +651,9 @@ static inline const char *json_next_backslash(const char *pe, const char *string positions->size--; const char *next_position = positions->positions[0]; positions->positions++; - return next_position; + if (next_position >= pe) { + return next_position; + } } if (positions->has_more) { diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 3e662bda32..257e4f1736 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -325,6 +325,13 @@ class JSONParserTest < Test::Unit::TestCase assert_raise(JSON::ParserError) { parse('"\u111___"') } end + def test_unicode_followed_by_newline + # Ref: https://github.com/ruby/json/issues/912 + assert_equal "🌌\n".bytes, JSON.parse('"\ud83c\udf0c\n"').bytes + assert_equal "🌌\n", JSON.parse('"\ud83c\udf0c\n"') + assert_predicate JSON.parse('"\ud83c\udf0c\n"'), :valid_encoding? + end + def test_invalid_surogates assert_raise(JSON::ParserError) { parse('"\\uD800"') } assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') } -- cgit v1.2.3