diff options
Diffstat (limited to 'test/ruby/test_m17n.rb')
| -rw-r--r-- | test/ruby/test_m17n.rb | 274 |
1 files changed, 144 insertions, 130 deletions
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 6b0bc4de5e..9f7a3c7f4b 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -69,15 +69,6 @@ class TestM17N < Test::Unit::TestCase assert_regexp_fixed_encoding(r) end - def assert_regexp_usascii_literal(r, enc, ex = nil) - code = "# -*- encoding: US-ASCII -*-\n#{r}.encoding" - if ex - assert_raise(ex) { eval(code) } - else - assert_equal(enc, eval(code)) - end - end - def encdump(str) d = str.dump if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d @@ -195,69 +186,49 @@ class TestM17N < Test::Unit::TestCase end def test_string_inspect_encoding - EnvUtil.suppress_warning do - begin - orig_int = Encoding.default_internal - orig_ext = Encoding.default_external - Encoding.default_internal = nil - [Encoding::UTF_8, Encoding::EUC_JP, Encoding::Windows_31J, Encoding::GB18030]. - each do |e| - Encoding.default_external = e - str = "\x81\x30\x81\x30".force_encoding('GB18030') - assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect) - str = e("\xa1\x8f\xa1\xa1") - expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP") - assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect) - str = s("\x81@") - assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect) - str = "\u3042\u{10FFFD}" - assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect) - end - Encoding.default_external = Encoding::UTF_8 - [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE, - Encoding::UTF8_SOFTBANK].each do |e| - str = "abc".encode(e) - assert_equal('"abc"', str.inspect) - end - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext + [ + Encoding::UTF_8, + Encoding::EUC_JP, + Encoding::Windows_31J, + Encoding::GB18030, + ].each do |e| + EnvUtil.with_default_external(e) do + str = "\x81\x30\x81\x30".force_encoding('GB18030') + assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect) + str = e("\xa1\x8f\xa1\xa1") + expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP") + assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect) + str = s("\x81@") + assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect) + str = "\u3042\u{10FFFD}" + assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect) end end - end - STR_WITHOUT_BOM = "\u3042".freeze - STR_WITH_BOM = "\uFEFF\u3042".freeze - bug8940 = '[ruby-core:59757] [Bug #8940]' - bug9415 = '[ruby-dev:47895] [Bug #9415]' - %w/UTF-16 UTF-32/.each do |enc| - %w/BE LE/.each do |endian| - bom = "\uFEFF".encode("#{enc}#{endian}").force_encoding(enc) - - define_method("test_utf_16_32_inspect(#{enc}#{endian})") do - s = STR_WITHOUT_BOM.encode(enc + endian) - # When a UTF-16/32 string doesn't have a BOM, - # inspect as a dummy encoding string. - assert_equal(s.dup.force_encoding("ISO-2022-JP").inspect, - s.dup.force_encoding(enc).inspect) - assert_normal_exit("#{bom.b.dump}.force_encoding('#{enc}').inspect", bug8940) - end - - define_method("test_utf_16_32_codepoints(#{enc}#{endian})") do - assert_equal([0xFEFF], bom.codepoints, bug9415) + EnvUtil.with_default_external(Encoding::UTF_8) do + [ + Encoding::UTF_16BE, + Encoding::UTF_16LE, + Encoding::UTF_32BE, + Encoding::UTF_32LE, + Encoding::UTF8_SOFTBANK + ].each do |e| + str = "abc".encode(e) + assert_equal('"abc"', str.inspect) end + end + end - define_method("test_utf_16_32_ord(#{enc}#{endian})") do - assert_equal(0xFEFF, bom.ord, bug9415) - end + def test_utf_dummy_are_like_regular_dummy_encodings + [Encoding::UTF_16, Encoding::UTF_32].each do |enc| + s = "\u3042".encode("UTF-32BE") + assert_equal(s.dup.force_encoding("ISO-2022-JP").inspect, s.dup.force_encoding(enc).inspect) + s = "\x00\x00\xFE\xFF" + assert_equal(s.dup.force_encoding("ISO-2022-JP").inspect, s.dup.force_encoding(enc).inspect) - define_method("test_utf_16_32_inspect(#{enc}#{endian}-BOM)") do - s = STR_WITH_BOM.encode(enc + endian) - # When a UTF-16/32 string has a BOM, - # inspect as a particular encoding string. - assert_equal(s.inspect, - s.dup.force_encoding(enc).inspect) - end + assert_equal [0, 0, 254, 255], "\x00\x00\xFE\xFF".force_encoding(enc).codepoints + assert_equal 0, "\x00\x00\xFE\xFF".force_encoding(enc).ord + assert_equal 255, "\xFF\xFE\x00\x00".force_encoding(enc).ord end end @@ -277,56 +248,43 @@ class TestM17N < Test::Unit::TestCase end def test_object_utf16_32_inspect - EnvUtil.suppress_warning do - begin - orig_int = Encoding.default_internal - orig_ext = Encoding.default_external - Encoding.default_internal = nil - Encoding.default_external = Encoding::UTF_8 - o = Object.new - [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| - o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" - assert_equal '[abc]', [o].inspect - end - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext + EnvUtil.with_default_external(Encoding::UTF_8) do + o = Object.new + [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| + o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" + assert_equal '[abc]', [o].inspect end end end def test_object_inspect_external - orig_v, $VERBOSE = $VERBOSE, false - orig_int, Encoding.default_internal = Encoding.default_internal, nil - orig_ext = Encoding.default_external + omit "https://bugs.ruby-lang.org/issues/18338" + o = Object.new - Encoding.default_external = Encoding::UTF_16BE - def o.inspect - "abc" - end - assert_nothing_raised(Encoding::CompatibilityError) { [o].inspect } + EnvUtil.with_default_external(Encoding::UTF_16BE) do + def o.inspect + "abc" + end + assert_nothing_raised(Encoding::CompatibilityError) { [o].inspect } - def o.inspect - "abc".encode(Encoding.default_external) + def o.inspect + "abc".encode(Encoding.default_external) + end + assert_equal '[abc]', [o].inspect end - assert_equal '[abc]', [o].inspect - - Encoding.default_external = Encoding::US_ASCII - def o.inspect - "\u3042" - end - assert_equal '[\u3042]', [o].inspect + EnvUtil.with_default_external(Encoding::US_ASCII) do + def o.inspect + "\u3042" + end + assert_equal '[\u3042]', [o].inspect - def o.inspect - "\x82\xa0".force_encoding(Encoding::Windows_31J) + def o.inspect + "\x82\xa0".force_encoding(Encoding::Windows_31J) + end + assert_equal '[\x{82A0}]', [o].inspect end - assert_equal '[\x{82A0}]', [o].inspect - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext - $VERBOSE = orig_v end def test_str_dump @@ -889,10 +847,22 @@ class TestM17N < Test::Unit::TestCase assert_raise(Encoding::CompatibilityError) { "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")] } + + assert_equal("\u3042".encode('Windows-31J'), "%c" % "\u3042\u3044".encode('Windows-31J')) end def test_sprintf_p Encoding.list.each do |e| + unless e.ascii_compatible? + format = e.dummy? ? "%p".force_encoding(e) : "%p".encode(e) + assert_raise(Encoding::CompatibilityError) do + sprintf(format, nil) + end + assert_raise(Encoding::CompatibilityError) do + format % nil + end + next + end format = "%p".force_encoding(e) ['', 'a', "\xC2\xA1", "\x00"].each do |s| s.force_encoding(e) @@ -1097,7 +1067,23 @@ class TestM17N < Test::Unit::TestCase assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3"))) assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3"))) s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") - assert_raise(Encoding::CompatibilityError){s.rindex(a("\xb1\xa3"))} + + a_with_e = /EUC-JP and BINARY \(ASCII-8BIT\)/ + assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do + s.index(a("\xb1\xa3")) + end + assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do + s.rindex(a("\xb1\xa3")) + end + + a_with_e = /BINARY \(ASCII-8BIT\) regexp with EUC-JP string/ + assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do + s.index(Regexp.new(a("\xb1\xa3"))) + end + assert_raise_with_message(Encoding::CompatibilityError, a_with_e) do + s.rindex(Regexp.new(a("\xb1\xa3"))) + end + bug11488 = '[ruby-core:70592] [Bug #11488]' each_encoding("abcdef", "def") do |str, substr| assert_equal(3, str.index(substr), bug11488) @@ -1427,31 +1413,42 @@ class TestM17N < Test::Unit::TestCase end def test_regexp_usascii - assert_regexp_usascii_literal('//', Encoding::US_ASCII) - assert_regexp_usascii_literal('/#{ }/', Encoding::US_ASCII) - assert_regexp_usascii_literal('/#{"a"}/', Encoding::US_ASCII) - assert_regexp_usascii_literal('/#{%q"\x80"}/', Encoding::ASCII_8BIT) - assert_regexp_usascii_literal('/#{"\x80"}/', nil, SyntaxError) - - assert_regexp_usascii_literal('/a/', Encoding::US_ASCII) - assert_regexp_usascii_literal('/a#{ }/', Encoding::US_ASCII) - assert_regexp_usascii_literal('/a#{"a"}/', Encoding::US_ASCII) - assert_regexp_usascii_literal('/a#{%q"\x80"}/', Encoding::ASCII_8BIT) - assert_regexp_usascii_literal('/a#{"\x80"}/', nil, SyntaxError) - - assert_regexp_usascii_literal('/\x80/', Encoding::ASCII_8BIT) - assert_regexp_usascii_literal('/\x80#{ }/', Encoding::ASCII_8BIT) - assert_regexp_usascii_literal('/\x80#{"a"}/', Encoding::ASCII_8BIT) - assert_regexp_usascii_literal('/\x80#{%q"\x80"}/', Encoding::ASCII_8BIT) - assert_regexp_usascii_literal('/\x80#{"\x80"}/', nil, SyntaxError) - - assert_regexp_usascii_literal('/\u1234/', Encoding::UTF_8) - assert_regexp_usascii_literal('/\u1234#{ }/', Encoding::UTF_8) - assert_regexp_usascii_literal('/\u1234#{"a"}/', Encoding::UTF_8) - assert_regexp_usascii_literal('/\u1234#{%q"\x80"}/', nil, SyntaxError) - assert_regexp_usascii_literal('/\u1234#{"\x80"}/', nil, SyntaxError) - assert_regexp_usascii_literal('/\u1234\x80/', nil, SyntaxError) - assert_regexp_usascii_literal('/\u1234#{ }\x80/', nil, RegexpError) + tests = [ + [__LINE__, '//', Encoding::US_ASCII], + [__LINE__, '/#{ }/', Encoding::US_ASCII], + [__LINE__, '/#{"a"}/', Encoding::US_ASCII], + [__LINE__, '/#{%q"\x80"}/', Encoding::US_ASCII], + [__LINE__, '/#{"\x80"}/', Encoding::ASCII_8BIT], + + [__LINE__, '/a/', Encoding::US_ASCII], + [__LINE__, '/a#{ }/', Encoding::US_ASCII], + [__LINE__, '/a#{"a"}/', Encoding::US_ASCII], + [__LINE__, '/a#{%q"\x80"}/', Encoding::ASCII_8BIT], + [__LINE__, '/a#{"\x80"}/', Encoding::ASCII_8BIT], + + [__LINE__, '/\x80/', Encoding::ASCII_8BIT], + [__LINE__, '/\x80#{ }/', Encoding::ASCII_8BIT], + [__LINE__, '/\x80#{"a"}/', Encoding::ASCII_8BIT], + [__LINE__, '/\x80#{%q"\x80"}/', Encoding::ASCII_8BIT], + [__LINE__, '/\x80#{"\x80"}/', Encoding::ASCII_8BIT], + + [__LINE__, '/\u1234/', Encoding::UTF_8], + [__LINE__, '/\u1234#{ }/', Encoding::UTF_8], + [__LINE__, '/\u1234#{"a"}/', Encoding::UTF_8], + + [__LINE__, '/\u1234#{%q"\x80"}/', nil, SyntaxError], + [__LINE__, '/\u1234#{"\x80"}/', nil, SyntaxError], + [__LINE__, '/\u1234\x80/', nil, SyntaxError], + [__LINE__, '/\u1234#{ }\x80/', nil, RegexpError], + ] + all_assertions_foreach(nil, *tests) do |line, r, enc, ex| + code = "# -*- encoding: US-ASCII -*-\n#{r}.encoding" + if ex + assert_raise(ex) {eval(code, nil, __FILE__, line-1)} + else + assert_equal(enc, eval(code, nil, __FILE__, line-1)) + end + end end def test_gbk @@ -1712,6 +1709,23 @@ class TestM17N < Test::Unit::TestCase assert_equal(e("[\"\xB4\xC1\xBB\xFA\"]"), s, bug11787) end + def test_encoding_names_of_default_internal + # [Bug #20595] [Bug #20598] + [ + "default_internal.names", + "name_list", + "aliases.keys" + ].each do |method| + assert_separately(%w(-W0), <<~RUBY) + exp_name = "int" + "ernal" + Encoding.default_internal = Encoding::ASCII_8BIT + name = Encoding.#{method}.find { |x| x == exp_name } + Encoding.default_internal = nil + assert_equal exp_name, name, "Encoding.#{method} [Bug #20595] [Bug #20598]" + RUBY + end + end + def test_greek_capital_gap bug12204 = '[ruby-core:74478] [Bug #12204] GREEK CAPITAL RHO and SIGMA' assert_equal("\u03A3", "\u03A1".succ, bug12204) |
