1 files changed, 212 insertions, 44 deletions
diff --git a/test/json/json_encoding_test.rb b/test/json/json_encoding_test.rb
index cc7b71553a..7ac06b2a7b 100644
--- a/test/json/json_encoding_test.rb
+++ b/test/json/json_encoding_test.rb
@@ -1,29 +1,20 @@
-# encoding: utf-8
-#frozen_string_literal: false
-require 'test_helper'
+# frozen_string_literal: true
+
+require_relative 'test_helper'
 
 class JSONEncodingTest < Test::Unit::TestCase
   include JSON
 
   def setup
     @utf_8      = '"© ≠ €!"'
-    @ascii_8bit = @utf_8.dup.force_encoding('ascii-8bit')
+    @ascii_8bit = @utf_8.b
     @parsed     = "© ≠ €!"
     @generated  = '"\u00a9 \u2260 \u20ac!"'
-    if String.method_defined?(:encode)
-      @utf_16_data = @parsed.encode('utf-16be', 'utf-8')
-      @utf_16be = @utf_8.encode('utf-16be', 'utf-8')
-      @utf_16le = @utf_8.encode('utf-16le', 'utf-8')
-      @utf_32be = @utf_8.encode('utf-32be', 'utf-8')
-      @utf_32le = @utf_8.encode('utf-32le', 'utf-8')
-    else
-      require 'iconv'
-      @utf_16_data, = Iconv.iconv('utf-16be', 'utf-8', @parsed)
-      @utf_16be, = Iconv.iconv('utf-16be', 'utf-8', @utf_8)
-      @utf_16le, = Iconv.iconv('utf-16le', 'utf-8', @utf_8)
-      @utf_32be, = Iconv.iconv('utf-32be', 'utf-8', @utf_8)
-      @utf_32le, = Iconv.iconv('utf-32le', 'utf-8', @utf_8)
-    end
+    @utf_16_data = @parsed.encode(Encoding::UTF_16BE, Encoding::UTF_8)
+    @utf_16be = @utf_8.encode(Encoding::UTF_16BE, Encoding::UTF_8)
+    @utf_16le = @utf_8.encode(Encoding::UTF_16LE, Encoding::UTF_8)
+    @utf_32be = @utf_8.encode(Encoding::UTF_32BE, Encoding::UTF_8)
+    @utf_32le = @utf_8.encode(Encoding::UTF_32LE, Encoding::UTF_8)
   end
 
   def test_parse
@@ -36,8 +27,20 @@ class JSONEncodingTest < Test::Unit::TestCase
   end
 
   def test_generate
-    assert_equal @generated, JSON.generate(@parsed, :ascii_only => true)
-    assert_equal @generated, JSON.generate(@utf_16_data, :ascii_only => true)
+    assert_equal @generated, JSON.generate(@parsed, ascii_only: true)
+    assert_equal @generated, JSON.generate(@utf_16_data, ascii_only: true)
+  end
+
+  def test_generate_shared_string
+    # Ref: https://github.com/ruby/json/issues/859
+    s = "01234567890"
+    assert_equal '"234567890"', JSON.dump(s[2..-1])
+    s = '01234567890123456789"a"b"c"d"e"f"g"h'
+    assert_equal '"\"a\"b\"c\"d\"e\"f\"g\""', JSON.dump(s[20, 15])
+    s = "0123456789001234567890012345678900123456789001234567890"
+    assert_equal '"23456789001234567890012345678900123456789001234567890"', JSON.dump(s[2..-1])
+    s = "0123456789001234567890012345678900123456789001234567890"
+    assert_equal '"567890012345678900123456789001234567890012345678"', JSON.dump(s[5..-3])
   end
 
   def test_unicode
@@ -47,37 +50,37 @@ class JSONEncodingTest < Test::Unit::TestCase
     assert_equal '"\u001f"', 0x1f.chr.to_json
     assert_equal '" "', ' '.to_json
     assert_equal "\"#{0x7f.chr}\"", 0x7f.chr.to_json
-    utf8 = [ "© ≠ €! \01" ]
+    utf8 = ["© ≠ €! \01"]
     json = '["© ≠ €! \u0001"]'
-    assert_equal json, utf8.to_json(:ascii_only => false)
+    assert_equal json, utf8.to_json(ascii_only: false)
     assert_equal utf8, parse(json)
     json = '["\u00a9 \u2260 \u20ac! \u0001"]'
-    assert_equal json, utf8.to_json(:ascii_only => true)
+    assert_equal json, utf8.to_json(ascii_only: true)
     assert_equal utf8, parse(json)
     utf8 = ["\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"]
     json = "[\"\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212\"]"
     assert_equal utf8, parse(json)
-    assert_equal json, utf8.to_json(:ascii_only => false)
+    assert_equal json, utf8.to_json(ascii_only: false)
     utf8 = ["\343\201\202\343\201\204\343\201\206\343\201\210\343\201\212"]
     assert_equal utf8, parse(json)
     json = "[\"\\u3042\\u3044\\u3046\\u3048\\u304a\"]"
-    assert_equal json, utf8.to_json(:ascii_only => true)
+    assert_equal json, utf8.to_json(ascii_only: true)
     assert_equal utf8, parse(json)
     utf8 = ['საქართველო']
     json = '["საქართველო"]'
-    assert_equal json, utf8.to_json(:ascii_only => false)
+    assert_equal json, utf8.to_json(ascii_only: false)
     json = "[\"\\u10e1\\u10d0\\u10e5\\u10d0\\u10e0\\u10d7\\u10d5\\u10d4\\u10da\\u10dd\"]"
-    assert_equal json, utf8.to_json(:ascii_only => true)
+    assert_equal json, utf8.to_json(ascii_only: true)
     assert_equal utf8, parse(json)
-    assert_equal '["Ã"]', generate(["Ã"], :ascii_only => false)
-    assert_equal '["\\u00c3"]', generate(["Ã"], :ascii_only => true)
+    assert_equal '["Ã"]', generate(["Ã"], ascii_only: false)
+    assert_equal '["\\u00c3"]', generate(["Ã"], ascii_only: true)
     assert_equal ["€"], parse('["\u20ac"]')
     utf8 = ["\xf0\xa0\x80\x81"]
     json = "[\"\xf0\xa0\x80\x81\"]"
-    assert_equal json, generate(utf8, :ascii_only => false)
+    assert_equal json, generate(utf8, ascii_only: false)
     assert_equal utf8, parse(json)
     json = '["\ud840\udc01"]'
-    assert_equal json, generate(utf8, :ascii_only => true)
+    assert_equal json, generate(utf8, ascii_only: true)
     assert_equal utf8, parse(json)
     assert_raise(JSON::ParserError) { parse('"\u"') }
     assert_raise(JSON::ParserError) { parse('"\ud800"') }
@@ -85,23 +88,188 @@ class JSONEncodingTest < Test::Unit::TestCase
 
   def test_chars
     (0..0x7f).each do |i|
-      json = '["\u%04x"]' % i
-      if RUBY_VERSION >= "1.9."
-        i = i.chr
-      end
-      assert_equal i, parse(json).first[0]
-      if i == ?\b
-        generated = generate(["" << i])
-        assert '["\b"]' == generated || '["\10"]' == generated
-      elsif [?\n, ?\r, ?\t, ?\f].include?(i)
-        assert_equal '[' << ('' << i).dump << ']', generate(["" << i])
+      json = '"\u%04x"' % i
+      i = i.chr
+      assert_equal i, parse(json)[0]
+      if i == "\b"
+        generated = generate(i)
+        assert ['"\b"', '"\10"'].include?(generated)
+      elsif ["\n", "\r", "\t", "\f"].include?(i)
+        assert_equal i.dump, generate(i)
       elsif i.chr < 0x20.chr
-        assert_equal json, generate(["" << i])
+        assert_equal json, generate(i)
       end
     end
     assert_raise(JSON::GeneratorError) do
-      generate(["\x80"], :ascii_only => true)
+      generate(["\x80"], ascii_only: true)
+    end
+    assert_equal "\302\200", parse('"\u0080"')
+  end
+
+  def test_deeply_nested_structures
+    # Test for deeply nested arrays
+    nesting_level = 100
+    deeply_nested = []
+    current = deeply_nested
+
+    (nesting_level - 1).times do
+      current << []
+      current = current[0]
     end
-    assert_equal "\302\200", parse('["\u0080"]').first
+
+    json = generate(deeply_nested)
+    assert_equal deeply_nested, parse(json)
+
+    # Test for deeply nested objects/hashes
+    deeply_nested_hash = {}
+    current_hash = deeply_nested_hash
+
+    (nesting_level - 1).times do |i|
+      current_hash["key#{i}"] = {}
+      current_hash = current_hash["key#{i}"]
+    end
+
+    json = generate(deeply_nested_hash)
+    assert_equal deeply_nested_hash, parse(json)
+  end
+
+  def test_very_large_json_strings
+    # Create a large array with repeated elements
+    large_array = Array.new(10_000) { |i| "item#{i}" }
+
+    json = generate(large_array)
+    parsed = parse(json)
+
+    assert_equal large_array.size, parsed.size
+    assert_equal large_array.first, parsed.first
+    assert_equal large_array.last, parsed.last
+
+    # Create a large hash
+    large_hash = {}
+    10_000.times { |i| large_hash["key#{i}"] = "value#{i}" }
+
+    json = generate(large_hash)
+    parsed = parse(json)
+
+    assert_equal large_hash.size, parsed.size
+    assert_equal large_hash["key0"], parsed["key0"]
+    assert_equal large_hash["key9999"], parsed["key9999"]
+  end
+
+  def test_invalid_utf8_sequences
+    invalid_utf8 = "\xFF\xFF"
+    error = assert_raise(JSON::GeneratorError) do
+      generate(invalid_utf8)
+    end
+    assert_match(%r{source sequence is illegal/malformed utf-8}, error.message)
+  end
+
+  def test_surrogate_pair_handling
+    # Test valid surrogate pairs
+    assert_equal "\u{10000}", parse('"\ud800\udc00"')
+    assert_equal "\u{10FFFF}", parse('"\udbff\udfff"')
+
+    # The existing test already checks for orphaned high surrogate
+    assert_raise(JSON::ParserError) { parse('"\ud800"') }
+
+    # Test generating surrogate pairs
+    utf8_string = "\u{10437}"
+    generated = generate(utf8_string, ascii_only: true)
+    assert_match(/\\ud801\\udc37/, generated)
+  end
+
+  def test_json_escaping_edge_cases
+    # Test escaping forward slashes
+    assert_equal "/", parse('"\/"')
+
+    # Test escaping backslashes
+    assert_equal "\\", parse('"\\\\"')
+
+    # Test escaping quotes
+    assert_equal '"', parse('"\\""')
+
+    # Multiple escapes in sequence - different JSON parsers might handle escaped forward slashes differently
+    # Some parsers preserve the escaping, others don't
+    escaped_result = parse('"\\\\\\"\\/"')
+    assert_match(/\\"/, escaped_result)
+    assert_match(%r{/}, escaped_result)
+
+    # Generate string with all special characters
+    special_chars = "\b\f\n\r\t\"\\"
+    escaped_json = generate(special_chars)
+    assert_equal special_chars, parse(escaped_json)
+  end
+
+  def test_empty_objects_and_arrays
+    # Test empty objects with different encodings
+    assert_equal({}, parse('{}'))
+    assert_equal({}, parse('{}'.encode(Encoding::UTF_16BE)))
+    assert_equal({}, parse('{}'.encode(Encoding::UTF_16LE)))
+    assert_equal({}, parse('{}'.encode(Encoding::UTF_32BE)))
+    assert_equal({}, parse('{}'.encode(Encoding::UTF_32LE)))
+
+    # Test empty arrays with different encodings
+    assert_equal([], parse('[]'))
+    assert_equal([], parse('[]'.encode(Encoding::UTF_16BE)))
+    assert_equal([], parse('[]'.encode(Encoding::UTF_16LE)))
+    assert_equal([], parse('[]'.encode(Encoding::UTF_32BE)))
+    assert_equal([], parse('[]'.encode(Encoding::UTF_32LE)))
+
+    # Test generating empty objects and arrays
+    assert_equal '{}', generate({})
+    assert_equal '[]', generate([])
+  end
+
+  def test_null_character_handling
+    # Test parsing null character
+    assert_equal "\u0000", parse('"\u0000"')
+
+    # Test generating null character
+    string_with_null = "\u0000"
+    generated = generate(string_with_null)
+    assert_equal '"\u0000"', generated
+
+    # Test null characters in middle of string
+    mixed_string = "before\u0000after"
+    generated = generate(mixed_string)
+    assert_equal mixed_string, parse(generated)
+  end
+
+  def test_whitespace_handling
+    # Test parsing with various whitespace patterns
+    assert_equal({}, parse(' { } '))
+    assert_equal({}, parse("{\r\n}"))
+    assert_equal([], parse(" [ \n ] "))
+    assert_equal(["a", "b"], parse(" [ \n\"a\",\r\n  \"b\"\n ] "))
+    assert_equal({ "a" => "b" }, parse(" { \n\"a\" \r\n: \t\"b\"\n } "))
+
+    # Test with excessive whitespace
+    excessive_whitespace = " \n\r\t" * 10 + "{}" + " \n\r\t" * 10
+    assert_equal({}, parse(excessive_whitespace))
+
+    # Mixed whitespace in keys and values
+    mixed_json = '{"a \n b":"c \r\n d"}'
+    assert_equal({ "a \n b" => "c \r\n d" }, parse(mixed_json))
+  end
+
+  def test_control_character_handling
+    # Test all control characters (U+0000 to U+001F)
+    (0..0x1F).each do |i|
+      # Skip already tested ones
+      next if [0x08, 0x0A, 0x0D, 0x0C, 0x09].include?(i)
+
+      control_char = i.chr('UTF-8')
+      escaped_json = '"' + "\\u%04x" % i + '"'
+      assert_equal control_char, parse(escaped_json)
+
+      # Check that the character is properly escaped when generating
+      assert_match(/\\u00[0-1][0-9a-f]/, generate(control_char))
+    end
+
+    # Test string with multiple control characters
+    control_str = "\u0001\u0002\u0003\u0004"
+    generated = generate(control_str)
+    assert_equal control_str, parse(generated)
+    assert_match(/\\u0001\\u0002\\u0003\\u0004/, generated)
   end
 end