diff options
Diffstat (limited to 'test/ruby/test_transcode.rb')
-rw-r--r-- | test/ruby/test_transcode.rb | 535 |
1 files changed, 289 insertions, 246 deletions
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index c8b0034e06..63d37f4ba4 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -10,9 +10,9 @@ class TestTranscode < Test::Unit::TestCase assert_raise(Encoding::ConverterNotFoundError) { 'abc'.encode!('foo', 'bar') } assert_raise(Encoding::ConverterNotFoundError) { 'abc'.force_encoding('utf-8').encode('foo') } assert_raise(Encoding::ConverterNotFoundError) { 'abc'.force_encoding('utf-8').encode!('foo') } - assert_raise(Encoding::UndefinedConversionError) { "\x80".encode('utf-8','ASCII-8BIT') } - assert_raise(Encoding::InvalidByteSequenceError) { "\x80".encode('utf-8','US-ASCII') } - assert_raise(Encoding::UndefinedConversionError) { "\xA5".encode('utf-8','iso-8859-3') } + assert_undefined_in("\x80", 'ASCII-8BIT') + assert_invalid_in("\x80", 'US-ASCII') + assert_undefined_in("\xA5", 'iso-8859-3') assert_raise(FrozenError) { 'hello'.freeze.encode!('iso-8859-1') } assert_raise(FrozenError) { '\u3053\u3093\u306b\u3061\u306f'.freeze.encode!('iso-8859-1') } # こんにちは end @@ -52,16 +52,6 @@ class TestTranscode < Test::Unit::TestCase assert_equal("\u20AC"*200000, ("\xA4"*200000).encode!('utf-8', 'iso-8859-15')) end - def check_both_ways(utf8, raw, encoding) - assert_equal(utf8.force_encoding('utf-8'), raw.encode('utf-8', encoding),utf8.dump+raw.dump) - assert_equal(raw.force_encoding(encoding), utf8.encode(encoding, 'utf-8')) - end - - def check_both_ways2(str1, enc1, str2, enc2) - assert_equal(str1.force_encoding(enc1), str2.encode(enc1, enc2)) - assert_equal(str2.force_encoding(enc2), str1.encode(enc2, enc1)) - end - def test_encoding_of_ascii_originating_from_binary binary_string = [0x82, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x76, 0x65, 0x72, 0x79, 0x20, 0x6c, 0x6f, @@ -188,16 +178,16 @@ class TestTranscode < Test::Unit::TestCase def test_windows_874 check_both_ways("\u20AC", "\x80", 'windows-874') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-874') } - assert_raise(Encoding::UndefinedConversionError) { "\x84".encode("utf-8", 'windows-874') } + assert_undefined_in("\x81", 'windows-874') + assert_undefined_in("\x84", 'windows-874') check_both_ways("\u2026", "\x85", 'windows-874') # … - assert_raise(Encoding::UndefinedConversionError) { "\x86".encode("utf-8", 'windows-874') } - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-874') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-874') } + assert_undefined_in("\x86", 'windows-874') + assert_undefined_in("\x8F", 'windows-874') + assert_undefined_in("\x90", 'windows-874') check_both_ways("\u2018", "\x91", 'windows-874') # ‘ check_both_ways("\u2014", "\x97", 'windows-874') # — - assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-874') } - assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-874') } + assert_undefined_in("\x98", 'windows-874') + assert_undefined_in("\x9F", 'windows-874') check_both_ways("\u00A0", "\xA0", 'windows-874') # non-breaking space check_both_ways("\u0E0F", "\xAF", 'windows-874') # ฏ check_both_ways("\u0E10", "\xB0", 'windows-874') # ฐ @@ -206,31 +196,31 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u0E2F", "\xCF", 'windows-874') # ฯ check_both_ways("\u0E30", "\xD0", 'windows-874') # ะ check_both_ways("\u0E3A", "\xDA", 'windows-874') # ฺ - assert_raise(Encoding::UndefinedConversionError) { "\xDB".encode("utf-8", 'windows-874') } - assert_raise(Encoding::UndefinedConversionError) { "\xDE".encode("utf-8", 'windows-874') } + assert_undefined_in("\xDB", 'windows-874') + assert_undefined_in("\xDE", 'windows-874') check_both_ways("\u0E3F", "\xDF", 'windows-874') # ฿ check_both_ways("\u0E40", "\xE0", 'windows-874') # เ check_both_ways("\u0E4F", "\xEF", 'windows-874') # ๏ check_both_ways("\u0E50", "\xF0", 'windows-874') # ๐ check_both_ways("\u0E5B", "\xFB", 'windows-874') # ๛ - assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'windows-874') } - assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-874') } + assert_undefined_in("\xFC", 'windows-874') + assert_undefined_in("\xFF", 'windows-874') end def test_windows_1250 check_both_ways("\u20AC", "\x80", 'windows-1250') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1250') } + assert_undefined_in("\x81", 'windows-1250') check_both_ways("\u201A", "\x82", 'windows-1250') # ‚ - assert_raise(Encoding::UndefinedConversionError) { "\x83".encode("utf-8", 'windows-1250') } + assert_undefined_in("\x83", 'windows-1250') check_both_ways("\u201E", "\x84", 'windows-1250') # „ check_both_ways("\u2021", "\x87", 'windows-1250') # ‡ - assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1250') } + assert_undefined_in("\x88", 'windows-1250') check_both_ways("\u2030", "\x89", 'windows-1250') # ‰ check_both_ways("\u0179", "\x8F", 'windows-1250') # Ź - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1250') } + assert_undefined_in("\x90", 'windows-1250') check_both_ways("\u2018", "\x91", 'windows-1250') # ‘ check_both_ways("\u2014", "\x97", 'windows-1250') # — - assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1250') } + assert_undefined_in("\x98", 'windows-1250') check_both_ways("\u2122", "\x99", 'windows-1250') # ™ check_both_ways("\u00A0", "\xA0", 'windows-1250') # non-breaking space check_both_ways("\u017B", "\xAF", 'windows-1250') # Ż @@ -251,7 +241,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u20AC", "\x88", 'windows-1251') # € check_both_ways("\u040F", "\x8F", 'windows-1251') # Џ check_both_ways("\u0452", "\x90", 'windows-1251') # ђ - assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1251') } + assert_undefined_in("\x98", 'windows-1251') check_both_ways("\u045F", "\x9F", 'windows-1251') # џ check_both_ways("\u00A0", "\xA0", 'windows-1251') # non-breaking space check_both_ways("\u0407", "\xAF", 'windows-1251') # Ї @@ -269,16 +259,16 @@ class TestTranscode < Test::Unit::TestCase def test_windows_1252 check_both_ways("\u20AC", "\x80", 'windows-1252') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1252') } + assert_undefined_in("\x81", 'windows-1252') check_both_ways("\u201A", "\x82", 'windows-1252') # ‚ check_both_ways("\u0152", "\x8C", 'windows-1252') # >Œ - assert_raise(Encoding::UndefinedConversionError) { "\x8D".encode("utf-8", 'windows-1252') } + assert_undefined_in("\x8D", 'windows-1252') check_both_ways("\u017D", "\x8E", 'windows-1252') # Ž - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1252') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1252') } + assert_undefined_in("\x8F", 'windows-1252') + assert_undefined_in("\x90", 'windows-1252') check_both_ways("\u2018", "\x91", 'windows-1252') #‘ check_both_ways("\u0153", "\x9C", 'windows-1252') # œ - assert_raise(Encoding::UndefinedConversionError) { "\x9D".encode("utf-8", 'windows-1252') } + assert_undefined_in("\x9D", 'windows-1252') check_both_ways("\u017E", "\x9E", 'windows-1252') # ž check_both_ways("\u00A0", "\xA0", 'windows-1252') # non-breaking space check_both_ways("\u00AF", "\xAF", 'windows-1252') # ¯ @@ -296,24 +286,24 @@ class TestTranscode < Test::Unit::TestCase def test_windows_1253 check_both_ways("\u20AC", "\x80", 'windows-1253') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x81", 'windows-1253') check_both_ways("\u201A", "\x82", 'windows-1253') # ‚ check_both_ways("\u2021", "\x87", 'windows-1253') # ‡ - assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x88", 'windows-1253') check_both_ways("\u2030", "\x89", 'windows-1253') # ‰ - assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x8A", 'windows-1253') check_both_ways("\u2039", "\x8B", 'windows-1253') # ‹ - assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1253') } - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1253') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x8C", 'windows-1253') + assert_undefined_in("\x8F", 'windows-1253') + assert_undefined_in("\x90", 'windows-1253') check_both_ways("\u2018", "\x91", 'windows-1253') # ‘ check_both_ways("\u2014", "\x97", 'windows-1253') # — - assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x98", 'windows-1253') check_both_ways("\u2122", "\x99", 'windows-1253') # ™ - assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x9A", 'windows-1253') check_both_ways("\u203A", "\x9B", 'windows-1253') # › - assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1253') } - assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1253') } + assert_undefined_in("\x9C", 'windows-1253') + assert_undefined_in("\x9F", 'windows-1253') check_both_ways("\u00A0", "\xA0", 'windows-1253') # non-breaking space check_both_ways("\u2015", "\xAF", 'windows-1253') # ― check_both_ways("\u00B0", "\xB0", 'windows-1253') # ° @@ -322,28 +312,28 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u039F", "\xCF", 'windows-1253') # Ο check_both_ways("\u03A0", "\xD0", 'windows-1253') # Π check_both_ways("\u03A1", "\xD1", 'windows-1253') # Ρ - assert_raise(Encoding::UndefinedConversionError) { "\xD2".encode("utf-8", 'windows-1253') } + assert_undefined_in("\xD2", 'windows-1253') check_both_ways("\u03A3", "\xD3", 'windows-1253') # Σ check_both_ways("\u03AF", "\xDF", 'windows-1253') # ί check_both_ways("\u03B0", "\xE0", 'windows-1253') # ΰ check_both_ways("\u03BF", "\xEF", 'windows-1253') # ο check_both_ways("\u03C0", "\xF0", 'windows-1253') # π check_both_ways("\u03CE", "\xFE", 'windows-1253') # ώ - assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-1253') } + assert_undefined_in("\xFF", 'windows-1253') end def test_windows_1254 check_both_ways("\u20AC", "\x80", 'windows-1254') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1254') } + assert_undefined_in("\x81", 'windows-1254') check_both_ways("\u201A", "\x82", 'windows-1254') # ‚ check_both_ways("\u0152", "\x8C", 'windows-1254') # Œ - assert_raise(Encoding::UndefinedConversionError) { "\x8D".encode("utf-8", 'windows-1254') } - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1254') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1254') } + assert_undefined_in("\x8D", 'windows-1254') + assert_undefined_in("\x8F", 'windows-1254') + assert_undefined_in("\x90", 'windows-1254') check_both_ways("\u2018", "\x91", 'windows-1254') # ‘ check_both_ways("\u0153", "\x9C", 'windows-1254') # œ - assert_raise(Encoding::UndefinedConversionError) { "\x9D".encode("utf-8", 'windows-1254') } - assert_raise(Encoding::UndefinedConversionError) { "\x9E".encode("utf-8", 'windows-1254') } + assert_undefined_in("\x9D", 'windows-1254') + assert_undefined_in("\x9E", 'windows-1254') check_both_ways("\u0178", "\x9F", 'windows-1254') # Ÿ check_both_ways("\u00A0", "\xA0", 'windows-1254') # non-breaking space check_both_ways("\u00AF", "\xAF", 'windows-1254') # ¯ @@ -361,20 +351,20 @@ class TestTranscode < Test::Unit::TestCase def test_windows_1255 check_both_ways("\u20AC", "\x80", 'windows-1255') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1255') } + assert_undefined_in("\x81", 'windows-1255') check_both_ways("\u201A", "\x82", 'windows-1255') # ‚ check_both_ways("\u2030", "\x89", 'windows-1255') # ‰ - assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1255') } + assert_undefined_in("\x8A", 'windows-1255') check_both_ways("\u2039", "\x8B", 'windows-1255') # ‹ - assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1255') } - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1255') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1255') } + assert_undefined_in("\x8C", 'windows-1255') + assert_undefined_in("\x8F", 'windows-1255') + assert_undefined_in("\x90", 'windows-1255') check_both_ways("\u2018", "\x91", 'windows-1255') # ‘ check_both_ways("\u2122", "\x99", 'windows-1255') # ™ - assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1255') } + assert_undefined_in("\x9A", 'windows-1255') check_both_ways("\u203A", "\x9B", 'windows-1255') # › - assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1255') } - assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1255') } + assert_undefined_in("\x9C", 'windows-1255') + assert_undefined_in("\x9F", 'windows-1255') check_both_ways("\u00A0", "\xA0", 'windows-1255') # non-breaking space check_both_ways("\u00A1", "\xA1", 'windows-1255') # ¡ check_both_ways("\u00D7", "\xAA", 'windows-1255') # × @@ -391,17 +381,17 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u05C0", "\xD0", 'windows-1255') # ׀ check_both_ways("\u05F3", "\xD7", 'windows-1255') # ׳ check_both_ways("\u05F4", "\xD8", 'windows-1255') # ״ - assert_raise(Encoding::UndefinedConversionError) { "\xD9".encode("utf-8", 'windows-1255') } - assert_raise(Encoding::UndefinedConversionError) { "\xDF".encode("utf-8", 'windows-1255') } + assert_undefined_in("\xD9", 'windows-1255') + assert_undefined_in("\xDF", 'windows-1255') check_both_ways("\u05D0", "\xE0", 'windows-1255') # א check_both_ways("\u05DF", "\xEF", 'windows-1255') # ן check_both_ways("\u05E0", "\xF0", 'windows-1255') # נ check_both_ways("\u05EA", "\xFA", 'windows-1255') # ת - assert_raise(Encoding::UndefinedConversionError) { "\xFB".encode("utf-8", 'windows-1255') } - assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'windows-1255') } + assert_undefined_in("\xFB", 'windows-1255') + assert_undefined_in("\xFC", 'windows-1255') check_both_ways("\u200E", "\xFD", 'windows-1255') # left-to-right mark check_both_ways("\u200F", "\xFE", 'windows-1255') # right-to-left mark - assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-1255') } + assert_undefined_in("\xFF", 'windows-1255') end def test_windows_1256 @@ -429,35 +419,35 @@ class TestTranscode < Test::Unit::TestCase def test_windows_1257 check_both_ways("\u20AC", "\x80", 'windows-1257') # € - assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x81", 'windows-1257') check_both_ways("\u201A", "\x82", 'windows-1257') # ‚ - assert_raise(Encoding::UndefinedConversionError) { "\x83".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x83", 'windows-1257') check_both_ways("\u201E", "\x84", 'windows-1257') # „ check_both_ways("\u2021", "\x87", 'windows-1257') # ‡ - assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x88", 'windows-1257') check_both_ways("\u2030", "\x89", 'windows-1257') # ‰ - assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x8A", 'windows-1257') check_both_ways("\u2039", "\x8B", 'windows-1257') # ‹ - assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x8C", 'windows-1257') check_both_ways("\u00A8", "\x8D", 'windows-1257') # ¨ check_both_ways("\u02C7", "\x8E", 'windows-1257') # ˇ check_both_ways("\u00B8", "\x8F", 'windows-1257') # ¸ - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x90", 'windows-1257') check_both_ways("\u2018", "\x91", 'windows-1257') # ‘ check_both_ways("\u2014", "\x97", 'windows-1257') # — - assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x98", 'windows-1257') check_both_ways("\u2122", "\x99", 'windows-1257') # ™ - assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x9A", 'windows-1257') check_both_ways("\u203A", "\x9B", 'windows-1257') # › - assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x9C", 'windows-1257') check_both_ways("\u00AF", "\x9D", 'windows-1257') # ¯ check_both_ways("\u02DB", "\x9E", 'windows-1257') # ˛ - assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1257') } + assert_undefined_in("\x9F", 'windows-1257') check_both_ways("\u00A0", "\xA0", 'windows-1257') # non-breaking space - assert_raise(Encoding::UndefinedConversionError) { "\xA1".encode("utf-8", 'windows-1257') } + assert_undefined_in("\xA1", 'windows-1257') check_both_ways("\u00A2", "\xA2", 'windows-1257') # ¢ check_both_ways("\u00A4", "\xA4", 'windows-1257') # ¤ - assert_raise(Encoding::UndefinedConversionError) { "\xA5".encode("utf-8", 'windows-1257') } + assert_undefined_in("\xA5", 'windows-1257') check_both_ways("\u00A6", "\xA6", 'windows-1257') # ¦ check_both_ways("\u00C6", "\xAF", 'windows-1257') # Æ check_both_ways("\u00B0", "\xB0", 'windows-1257') # ° @@ -492,9 +482,9 @@ class TestTranscode < Test::Unit::TestCase end def test_IBM720 - assert_raise(Encoding::UndefinedConversionError) { "\x80".encode("utf-8", 'IBM720') } - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'IBM720') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'IBM720') } + assert_undefined_in("\x80", 'IBM720') + assert_undefined_in("\x8F", 'IBM720') + assert_undefined_in("\x90", 'IBM720') check_both_ways("\u0627", "\x9F", 'IBM720') # ا check_both_ways("\u0628", "\xA0", 'IBM720') # ب check_both_ways("\u00BB", "\xAF", 'IBM720') # » @@ -580,17 +570,17 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u00A4", "\xCF", 'IBM857') # ¤ check_both_ways("\u00BA", "\xD0", 'IBM857') # º check_both_ways("\u00C8", "\xD4", 'IBM857') # È - assert_raise(Encoding::UndefinedConversionError) { "\xD5".encode("utf-8", 'IBM857') } + assert_undefined_in("\xD5", 'IBM857') check_both_ways("\u00CD", "\xD6", 'IBM857') # Í check_both_ways("\u2580", "\xDF", 'IBM857') # ▀ check_both_ways("\u00D3", "\xE0", 'IBM857') # Ó check_both_ways("\u00B5", "\xE6", 'IBM857') # µ - assert_raise(Encoding::UndefinedConversionError) { "\xE7".encode("utf-8", 'IBM857') } + assert_undefined_in("\xE7", 'IBM857') check_both_ways("\u00D7", "\xE8", 'IBM857') # × check_both_ways("\u00B4", "\xEF", 'IBM857') # ´ check_both_ways("\u00AD", "\xF0", 'IBM857') # soft hyphen check_both_ways("\u00B1", "\xF1", 'IBM857') # ± - assert_raise(Encoding::UndefinedConversionError) { "\xF2".encode("utf-8", 'IBM857') } + assert_undefined_in("\xF2", 'IBM857') check_both_ways("\u00BE", "\xF3", 'IBM857') # ¾ check_both_ways("\u00A0", "\xFF", 'IBM857') # non-breaking space end @@ -671,6 +661,25 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u00A0", "\xFF", 'IBM863') # non-breaking space end + def test_IBM864 + check_both_ways("\u00B0", "\x80", 'IBM864') # ° + check_both_ways("\u2518", "\x8F", 'IBM864') # ┘ + check_both_ways("\u03B2", "\x90", 'IBM864') # β + check_both_ways("\uFE73", "\x9F", 'IBM864') # ﹳ + check_both_ways("\u00A0", "\xA0", 'IBM864') # non-breaking space + check_both_ways("\uFEA5", "\xAF", 'IBM864') # ﺥ + check_both_ways("\u0660", "\xB0", 'IBM864') # ٠ + check_both_ways("\u061F", "\xBF", 'IBM864') # ؟ + check_both_ways("\u00A2", "\xC0", 'IBM864') # ¢ + check_both_ways("\uFEA9", "\xCF", 'IBM864') # ﺩ + check_both_ways("\uFEAB", "\xD0", 'IBM864') # ﺫ + check_both_ways("\uFEC9", "\xDF", 'IBM864') # ﻉ + check_both_ways("\u0640", "\xE0", 'IBM864') # ـ + check_both_ways("\uFEE1", "\xEF", 'IBM864') # ﻡ + check_both_ways("\uFE7D", "\xF0", 'IBM864') # ﹽ + check_both_ways("\u25A0", "\xFE", 'IBM864') # ■ + end + def test_IBM865 check_both_ways("\u00C7", "\x80", 'IBM865') # Ç check_both_ways("\u00C5", "\x8F", 'IBM865') # Å @@ -710,16 +719,16 @@ class TestTranscode < Test::Unit::TestCase end def test_IBM869 - assert_raise(Encoding::UndefinedConversionError) { "\x80".encode("utf-8", 'IBM869') } - assert_raise(Encoding::UndefinedConversionError) { "\x85".encode("utf-8", 'IBM869') } + assert_undefined_in("\x80", 'IBM869') + assert_undefined_in("\x85", 'IBM869') check_both_ways("\u0386", "\x86", 'IBM869') # Ά - assert_raise(Encoding::UndefinedConversionError) { "\x87".encode("utf-8", 'IBM869') } + assert_undefined_in("\x87", 'IBM869') check_both_ways("\u00B7", "\x88", 'IBM869') # · check_both_ways("\u0389", "\x8F", 'IBM869') # Ή check_both_ways("\u038A", "\x90", 'IBM869') # Ί check_both_ways("\u038C", "\x92", 'IBM869') # Ό - assert_raise(Encoding::UndefinedConversionError) { "\x93".encode("utf-8", 'IBM869') } - assert_raise(Encoding::UndefinedConversionError) { "\x94".encode("utf-8", 'IBM869') } + assert_undefined_in("\x93", 'IBM869') + assert_undefined_in("\x94", 'IBM869') check_both_ways("\u038E", "\x95", 'IBM869') # Ύ check_both_ways("\u03AF", "\x9F", 'IBM869') # ί check_both_ways("\u03CA", "\xA0", 'IBM869') # ϊ @@ -808,7 +817,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u03BF", "\xEF", 'macGreek') # ο check_both_ways("\u03C0", "\xF0", 'macGreek') # π check_both_ways("\u03B0", "\xFE", 'macGreek') # ΰ - assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'macGreek') } + assert_undefined_in("\xFF", 'macGreek') end def test_macIceland @@ -887,7 +896,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u00D4", "\xEF", 'macTurkish') # Ô #check_both_ways("\uF8FF", "\xF0", 'macTurkish') # Apple logo check_both_ways("\u00D9", "\xF4", 'macTurkish') # Ù - assert_raise(Encoding::UndefinedConversionError) { "\xF5".encode("utf-8", 'macTurkish') } + assert_undefined_in("\xF5", 'macTurkish') check_both_ways("\u02C6", "\xF6", 'macTurkish') # ˆ check_both_ways("\u02C7", "\xFF", 'macTurkish') # ˇ end @@ -958,11 +967,11 @@ class TestTranscode < Test::Unit::TestCase end def test_TIS_620 - assert_raise(Encoding::UndefinedConversionError) { "\x80".encode("utf-8", 'TIS-620') } - assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'TIS-620') } - assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'TIS-620') } - assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'TIS-620') } - assert_raise(Encoding::UndefinedConversionError) { "\xA0".encode("utf-8", 'TIS-620') } + assert_undefined_in("\x80", 'TIS-620') + assert_undefined_in("\x8F", 'TIS-620') + assert_undefined_in("\x90", 'TIS-620') + assert_undefined_in("\x9F", 'TIS-620') + assert_undefined_in("\xA0", 'TIS-620') check_both_ways("\u0E01", "\xA1", 'TIS-620') # ก check_both_ways("\u0E0F", "\xAF", 'TIS-620') # ฏ check_both_ways("\u0E10", "\xB0", 'TIS-620') # ฐ @@ -971,15 +980,15 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u0E2F", "\xCF", 'TIS-620') # ฯ check_both_ways("\u0E30", "\xD0", 'TIS-620') # ะ check_both_ways("\u0E3A", "\xDA", 'TIS-620') # ฺ - assert_raise(Encoding::UndefinedConversionError) { "\xDB".encode("utf-8", 'TIS-620') } - assert_raise(Encoding::UndefinedConversionError) { "\xDE".encode("utf-8", 'TIS-620') } + assert_undefined_in("\xDB", 'TIS-620') + assert_undefined_in("\xDE", 'TIS-620') check_both_ways("\u0E3F", "\xDF", 'TIS-620') # ฿ check_both_ways("\u0E40", "\xE0", 'TIS-620') # เ check_both_ways("\u0E4F", "\xEF", 'TIS-620') # ๏ check_both_ways("\u0E50", "\xF0", 'TIS-620') # ๐ check_both_ways("\u0E5B", "\xFB", 'TIS-620') # ๛ - assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'TIS-620') } - assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'TIS-620') } + assert_undefined_in("\xFC", 'TIS-620') + assert_undefined_in("\xFF", 'TIS-620') end def test_CP850 @@ -1182,15 +1191,15 @@ class TestTranscode < Test::Unit::TestCase expected = "\u{3042}\u{3044}\u{20bb7}" assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16")) check_both_ways(expected, %w/feff30423044d842dfb7/.pack("H*"), "UTF-16") - assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16")} - assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")} + assert_invalid_in(%w/feffdfb7/.pack("H*"), "UTF-16") + assert_invalid_in(%w/fffeb7df/.pack("H*"), "UTF-16") end def test_utf_32_bom expected = "\u{3042}\u{3044}\u{20bb7}" assert_equal(expected, %w/fffe00004230000044300000b70b0200/.pack("H*").encode("UTF-8","UTF-32")) check_both_ways(expected, %w/0000feff000030420000304400020bb7/.pack("H*"), "UTF-32") - assert_raise(Encoding::InvalidByteSequenceError){%w/0000feff00110000/.pack("H*").encode("UTF-8","UTF-32")} + assert_invalid_in(%w/0000feff00110000/.pack("H*"), "UTF-32") end def check_utf_32_both_ways(utf8, raw) @@ -1372,24 +1381,24 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u71FC", "\xE0\x9E", 'shift_jis') # 燼 check_both_ways("\u71F9", "\xE0\x9F", 'shift_jis') # 燹 check_both_ways("\u73F1", "\xE0\xFC", 'shift_jis') # 珱 - assert_raise(Encoding::UndefinedConversionError) { "\xEF\x40".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xEF\x7E".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xEF\x80".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xEF\x9E".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xEF\x9F".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xEF\xFC".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xF0\x40".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xF0\x7E".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xF0\x80".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xF0\x9E".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xF0\x9F".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xF0\xFC".encode("utf-8", 'shift_jis') } + assert_undefined_in("\xEF\x40", 'shift_jis') + assert_undefined_in("\xEF\x7E", 'shift_jis') + assert_undefined_in("\xEF\x80", 'shift_jis') + assert_undefined_in("\xEF\x9E", 'shift_jis') + assert_undefined_in("\xEF\x9F", 'shift_jis') + assert_undefined_in("\xEF\xFC", 'shift_jis') + assert_undefined_in("\xF0\x40", 'shift_jis') + assert_undefined_in("\xF0\x7E", 'shift_jis') + assert_undefined_in("\xF0\x80", 'shift_jis') + assert_undefined_in("\xF0\x9E", 'shift_jis') + assert_undefined_in("\xF0\x9F", 'shift_jis') + assert_undefined_in("\xF0\xFC", 'shift_jis') #check_both_ways("\u9ADC", "\xFC\x40", 'shift_jis') # 髜 (IBM extended) - assert_raise(Encoding::UndefinedConversionError) { "\xFC\x7E".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xFC\x80".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xFC\x9E".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xFC\x9F".encode("utf-8", 'shift_jis') } - assert_raise(Encoding::UndefinedConversionError) { "\xFC\xFC".encode("utf-8", 'shift_jis') } + assert_undefined_in("\xFC\x7E", 'shift_jis') + assert_undefined_in("\xFC\x80", 'shift_jis') + assert_undefined_in("\xFC\x9E", 'shift_jis') + assert_undefined_in("\xFC\x9F", 'shift_jis') + assert_undefined_in("\xFC\xFC", 'shift_jis') check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # 松本行弘 check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\x90\xC2\x8E\x52\x8A\x77\x89\x40\x91\xE5\x8A\x77", 'shift_jis') # 青山学院大学 check_both_ways("\u795E\u6797\u7FA9\u535A", "\x90\x5F\x97\xD1\x8B\x60\x94\x8E", 'shift_jis') # 神林義博 @@ -1409,34 +1418,34 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u00F7", "\xA1\xE0", 'euc-jp') # ÷ check_both_ways("\u25C7", "\xA1\xFE", 'euc-jp') # ◇ check_both_ways("\u25C6", "\xA2\xA1", 'euc-jp') # ◆ - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xAF".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB9".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xC2".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xC9".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xD1".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xDB".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xEB".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF1".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xFA".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xFD".encode("utf-8", 'euc-jp') } + assert_undefined_in("\xA2\xAF", 'euc-jp') + assert_undefined_in("\xA2\xB9", 'euc-jp') + assert_undefined_in("\xA2\xC2", 'euc-jp') + assert_undefined_in("\xA2\xC9", 'euc-jp') + assert_undefined_in("\xA2\xD1", 'euc-jp') + assert_undefined_in("\xA2\xDB", 'euc-jp') + assert_undefined_in("\xA2\xEB", 'euc-jp') + assert_undefined_in("\xA2\xF1", 'euc-jp') + assert_undefined_in("\xA2\xFA", 'euc-jp') + assert_undefined_in("\xA2\xFD", 'euc-jp') check_both_ways("\u25EF", "\xA2\xFE", 'euc-jp') # ◯ - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xAF".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xBA".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xC0".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xDB".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xE0".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xFB".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA4\xF4".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA5\xF7".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA6\xB9".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA6\xC0".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA6\xD9".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA7\xC2".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA7\xD0".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA7\xF2".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC1".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xCF\xD4".encode("utf-8", 'euc-jp') } - assert_raise(Encoding::UndefinedConversionError) { "\xCF\xFE".encode("utf-8", 'euc-jp') } + assert_undefined_in("\xA3\xAF", 'euc-jp') + assert_undefined_in("\xA3\xBA", 'euc-jp') + assert_undefined_in("\xA3\xC0", 'euc-jp') + assert_undefined_in("\xA3\xDB", 'euc-jp') + assert_undefined_in("\xA3\xE0", 'euc-jp') + assert_undefined_in("\xA3\xFB", 'euc-jp') + assert_undefined_in("\xA4\xF4", 'euc-jp') + assert_undefined_in("\xA5\xF7", 'euc-jp') + assert_undefined_in("\xA6\xB9", 'euc-jp') + assert_undefined_in("\xA6\xC0", 'euc-jp') + assert_undefined_in("\xA6\xD9", 'euc-jp') + assert_undefined_in("\xA7\xC2", 'euc-jp') + assert_undefined_in("\xA7\xD0", 'euc-jp') + assert_undefined_in("\xA7\xF2", 'euc-jp') + assert_undefined_in("\xA8\xC1", 'euc-jp') + assert_undefined_in("\xCF\xD4", 'euc-jp') + assert_undefined_in("\xCF\xFE", 'euc-jp') check_both_ways("\u6A97", "\xDD\xA1", 'euc-jp') # 檗 check_both_ways("\u6BEF", "\xDD\xDF", 'euc-jp') # 毯 check_both_ways("\u9EBE", "\xDD\xE0", 'euc-jp') # 麾 @@ -1449,7 +1458,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u71FC", "\xDF\xFE", 'euc-jp') # 燼 check_both_ways("\u71F9", "\xE0\xA1", 'euc-jp') # 燹 check_both_ways("\u73F1", "\xE0\xFE", 'euc-jp') # 珱 - assert_raise(Encoding::UndefinedConversionError) { "\xF4\xA7".encode("utf-8", 'euc-jp') } + assert_undefined_in("\xF4\xA7", 'euc-jp') #check_both_ways("\u9ADC", "\xFC\xE3", 'euc-jp') # 髜 (IBM extended) check_both_ways("\u677E\u672C\u884C\u5F18", "\xBE\xBE\xCB\xDC\xB9\xD4\xB9\xB0", 'euc-jp') # 松本行弘 @@ -1481,7 +1490,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u2127", "\xA3\xE0", 'euc-jis-2004') # ℧ check_both_ways("\u30A0", "\xA3\xFB", 'euc-jis-2004') # ゠ check_both_ways("\uFF54", "\xA3\xF4", 'euc-jis-2004') # t - assert_raise(Encoding::UndefinedConversionError) { "\xA5\xF7".encode("utf-8", 'euc-jis-2004') } + assert_undefined_in("\xA5\xF7", 'euc-jis-2004') check_both_ways("\u2664", "\xA6\xB9", 'euc-jis-2004') # ♤ check_both_ways("\u2663", "\xA6\xC0", 'euc-jis-2004') # ♣ check_both_ways("\u03C2", "\xA6\xD9", 'euc-jis-2004') # ς @@ -1566,33 +1575,33 @@ class TestTranscode < Test::Unit::TestCase end def test_eucjp_sjis_undef - assert_raise(Encoding::UndefinedConversionError) { "\x8e\xe0".encode("Shift_JIS", "EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\x8e\xfe".encode("Shift_JIS", "EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\x8f\xa1\xa1".encode("Shift_JIS", "EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\x8f\xa1\xfe".encode("Shift_JIS", "EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\x8f\xfe\xa1".encode("Shift_JIS", "EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\x8f\xfe\xfe".encode("Shift_JIS", "EUC-JP") } - - assert_raise(Encoding::UndefinedConversionError) { "\xf0\x40".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xf0\x7e".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xf0\x80".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xf0\xfc".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xfc\x40".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xfc\x7e".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xfc\x80".encode("EUC-JP", "Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\xfc\xfc".encode("EUC-JP", "Shift_JIS") } + assert_undefined_conversion("\x8e\xe0", "Shift_JIS", "EUC-JP") + assert_undefined_conversion("\x8e\xfe", "Shift_JIS", "EUC-JP") + assert_undefined_conversion("\x8f\xa1\xa1", "Shift_JIS", "EUC-JP") + assert_undefined_conversion("\x8f\xa1\xfe", "Shift_JIS", "EUC-JP") + assert_undefined_conversion("\x8f\xfe\xa1", "Shift_JIS", "EUC-JP") + assert_undefined_conversion("\x8f\xfe\xfe", "Shift_JIS", "EUC-JP") + + assert_undefined_conversion("\xf0\x40", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xf0\x7e", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xf0\x80", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xf0\xfc", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xfc\x40", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xfc\x7e", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xfc\x80", "EUC-JP", "Shift_JIS") + assert_undefined_conversion("\xfc\xfc", "EUC-JP", "Shift_JIS") end def test_iso_2022_jp - assert_raise(Encoding::InvalidByteSequenceError) { "\x1b(A".encode("utf-8", "iso-2022-jp") } - assert_raise(Encoding::InvalidByteSequenceError) { "\x1b$(A".encode("utf-8", "iso-2022-jp") } - assert_raise(Encoding::InvalidByteSequenceError) { "\x1b$C".encode("utf-8", "iso-2022-jp") } - assert_raise(Encoding::InvalidByteSequenceError) { "\x0e".encode("utf-8", "iso-2022-jp") } - assert_raise(Encoding::InvalidByteSequenceError) { "\x80".encode("utf-8", "iso-2022-jp") } - assert_raise(Encoding::InvalidByteSequenceError) { "\x1b$(Dd!\x1b(B".encode("utf-8", "iso-2022-jp") } - assert_raise(Encoding::UndefinedConversionError) { "\u9299".encode("iso-2022-jp") } - assert_raise(Encoding::UndefinedConversionError) { "\uff71\uff72\uff73\uff74\uff75".encode("iso-2022-jp") } - assert_raise(Encoding::InvalidByteSequenceError) { "\x1b(I12345\x1b(B".encode("utf-8", "iso-2022-jp") } + assert_invalid_in("\x1b(A", "iso-2022-jp") + assert_invalid_in("\x1b$(A", "iso-2022-jp") + assert_invalid_in("\x1b$C", "iso-2022-jp") + assert_invalid_in("\x0e", "iso-2022-jp") + assert_invalid_in("\x80", "iso-2022-jp") + assert_invalid_in("\x1b$(Dd!\x1b(B", "iso-2022-jp") + assert_undefined_conversion("\u9299", "iso-2022-jp") + assert_undefined_conversion("\uff71\uff72\uff73\uff74\uff75", "iso-2022-jp") + assert_invalid_in("\x1b(I12345\x1b(B", "iso-2022-jp") assert_equal("\xA1\xA1".force_encoding("euc-jp"), "\e$B!!\e(B".encode("EUC-JP", "ISO-2022-JP")) assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"), @@ -1625,6 +1634,8 @@ class TestTranscode < Test::Unit::TestCase assert_equal("\e$B%*!+%,%I%J!+%N!+%P%\\%^!+%Q%]%\"\e(B".force_encoding("cp50220"), "\xB5\xDE\xB6\xDE\xC4\xDE\xC5\xDE\xC9\xDE\xCA\xDE\xCE\xDE\xCF\xDE\xCA\xDF\xCE\xDF\xB1". encode("cp50220", "sjis")) + assert_equal("\e$B\x21\x23\e(I\x7E\e(B".force_encoding("cp50220"), + "\x8E\xA1\x8E\xFE".encode("cp50220", "cp51932")) end def test_iso_2022_jp_1 @@ -1655,11 +1666,11 @@ class TestTranscode < Test::Unit::TestCase assert_equal("\u005C", "\e(J\x5C\e(B".encode("UTF-8", "ISO-2022-JP")) assert_equal("\u005C", "\x5C".encode("stateless-ISO-2022-JP", "ISO-2022-JP")) assert_equal("\u005C", "\e(J\x5C\e(B".encode("stateless-ISO-2022-JP", "ISO-2022-JP")) - assert_raise(Encoding::UndefinedConversionError) { "\u00A5".encode("Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\u00A5".encode("Windows-31J") } - assert_raise(Encoding::UndefinedConversionError) { "\u00A5".encode("EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\u00A5".encode("eucJP-ms") } - assert_raise(Encoding::UndefinedConversionError) { "\u00A5".encode("CP51932") } + assert_undefined_conversion("\u00A5", "Shift_JIS") + assert_undefined_conversion("\u00A5", "Windows-31J") + assert_undefined_conversion("\u00A5", "EUC-JP") + assert_undefined_conversion("\u00A5", "eucJP-ms") + assert_undefined_conversion("\u00A5", "CP51932") # FULLWIDTH REVERSE SOLIDUS check_both_ways("\uFF3C", "\x81\x5F", "Shift_JIS") @@ -1680,21 +1691,21 @@ class TestTranscode < Test::Unit::TestCase assert_equal("\u007E", "\e(J\x7E\e(B".encode("UTF-8", "ISO-2022-JP")) assert_equal("\u007E", "\x7E".encode("stateless-ISO-2022-JP", "ISO-2022-JP")) assert_equal("\u007E", "\e(J\x7E\e(B".encode("stateless-ISO-2022-JP", "ISO-2022-JP")) - assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("Shift_JIS") } - assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("Windows-31J") } - assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("EUC-JP") } - assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("eucJP-ms") } - assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("CP51932") } + assert_undefined_conversion("\u203E", "Shift_JIS") + assert_undefined_conversion("\u203E", "Windows-31J") + assert_undefined_conversion("\u203E", "EUC-JP") + assert_undefined_conversion("\u203E", "eucJP-ms") + assert_undefined_conversion("\u203E", "CP51932") end def test_gb2312 check_both_ways("\u3000", "\xA1\xA1", 'GB2312') # full-width space check_both_ways("\u3013", "\xA1\xFE", 'GB2312') # 〓 - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GB2312') } + assert_undefined_in("\xA2\xB0", 'GB2312') check_both_ways("\u2488", "\xA2\xB1", 'GB2312') # ⒈ - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GB2312') } + assert_undefined_in("\xA2\xE4", 'GB2312') check_both_ways("\u3220", "\xA2\xE5", 'GB2312') # ㈠ - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GB2312') } + assert_undefined_in("\xA2\xF0", 'GB2312') check_both_ways("\u2160", "\xA2\xF1", 'GB2312') # Ⅰ check_both_ways("\uFF01", "\xA3\xA1", 'GB2312') # ! check_both_ways("\uFFE3", "\xA3\xFE", 'GB2312') #  ̄ @@ -1705,9 +1716,9 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u0410", "\xA7\xA1", 'GB2312') # А check_both_ways("\u0430", "\xA7\xD1", 'GB2312') # а check_both_ways("\u0101", "\xA8\xA1", 'GB2312') # ā - assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GB2312') } + assert_undefined_in("\xA8\xC4", 'GB2312') check_both_ways("\u3105", "\xA8\xC5", 'GB2312') # ㄅ - assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GB2312') } + assert_undefined_in("\xA9\xA3", 'GB2312') check_both_ways("\u2500", "\xA9\xA4", 'GB2312') # ─ check_both_ways("\u554A", "\xB0\xA1", 'GB2312') # 啊 check_both_ways("\u5265", "\xB0\xFE", 'GB2312') # 剥 @@ -1721,7 +1732,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u7384", "\xD0\xFE", 'GB2312') # 玄 check_both_ways("\u4F4F", "\xD7\xA1", 'GB2312') # 住 check_both_ways("\u5EA7", "\xD7\xF9", 'GB2312') # 座 - assert_raise(Encoding::UndefinedConversionError) { "\xD7\xFA".encode("utf-8", 'GB2312') } + assert_undefined_in("\xD7\xFA", 'GB2312') check_both_ways("\u647A", "\xDF\xA1", 'GB2312') # 摺 check_both_ways("\u553C", "\xDF\xFE", 'GB2312') # 唼 check_both_ways("\u5537", "\xE0\xA1", 'GB2312') # 唷 @@ -1759,48 +1770,48 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u3000", "\xA1\xA1", 'GBK') # full-width space check_both_ways("\u3001", "\xA1\xA2", 'GBK') # 、 check_both_ways("\u3013", "\xA1\xFE", 'GBK') # 〓 - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xA0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA2\xA0", 'GBK') check_both_ways("\u2170", "\xA2\xA1", 'GBK') # ⅰ - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA2\xB0", 'GBK') check_both_ways("\u2488", "\xA2\xB1", 'GBK') # ⒈ - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GBK') } + assert_undefined_in("\xA2\xE4", 'GBK') check_both_ways("\u3220", "\xA2\xE5", 'GBK') # ㈠ - assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA2\xF0", 'GBK') check_both_ways("\u2160", "\xA2\xF1", 'GBK') # Ⅰ - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xA0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA3\xA0", 'GBK') check_both_ways("\uFF01", "\xA3\xA1", 'GBK') # ! check_both_ways("\uFFE3", "\xA3\xFE", 'GBK') #  ̄ - assert_raise(Encoding::UndefinedConversionError) { "\xA4\xA0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA4\xA0", 'GBK') check_both_ways("\u3041", "\xA4\xA1", 'GBK') # ぁ - assert_raise(Encoding::UndefinedConversionError) { "\xA5\xA0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA5\xA0", 'GBK') check_both_ways("\u30A1", "\xA5\xA1", 'GBK') # ァ check_both_ways("\u0391", "\xA6\xA1", 'GBK') # Α check_both_ways("\u03B1", "\xA6\xC1", 'GBK') # α - assert_raise(Encoding::UndefinedConversionError) { "\xA6\xED".encode("utf-8", 'GBK') } + assert_undefined_in("\xA6\xED", 'GBK') check_both_ways("\uFE3B", "\xA6\xEE", 'GBK') # ︻ check_both_ways("\u0410", "\xA7\xA1", 'GBK') # А check_both_ways("\u0430", "\xA7\xD1", 'GBK') # а check_both_ways("\u02CA", "\xA8\x40", 'GBK') # ˊ check_both_ways("\u2587", "\xA8\x7E", 'GBK') # ▇ - assert_raise(Encoding::UndefinedConversionError) { "\xA8\x96".encode("utf-8", 'GBK') } + assert_undefined_in("\xA8\x96", 'GBK') check_both_ways("\u0101", "\xA8\xA1", 'GBK') # ā - assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBC".encode("utf-8", 'GBK') } - assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBF".encode("utf-8", 'GBK') } - assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GBK') } + assert_undefined_in("\xA8\xBC", 'GBK') + assert_undefined_in("\xA8\xBF", 'GBK') + assert_undefined_in("\xA8\xC4", 'GBK') check_both_ways("\u3105", "\xA8\xC5", 'GBK') # ㄅ check_both_ways("\u3021", "\xA9\x40", 'GBK') # 〡 - assert_raise(Encoding::UndefinedConversionError) { "\xA9\x58".encode("utf-8", 'GBK') } - assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5B".encode("utf-8", 'GBK') } - assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5D".encode("utf-8", 'GBK') } + assert_undefined_in("\xA9\x58", 'GBK') + assert_undefined_in("\xA9\x5B", 'GBK') + assert_undefined_in("\xA9\x5D", 'GBK') check_both_ways("\u3007", "\xA9\x96", 'GBK') # 〇 - assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GBK') } + assert_undefined_in("\xA9\xA3", 'GBK') check_both_ways("\u2500", "\xA9\xA4", 'GBK') # ─ - assert_raise(Encoding::UndefinedConversionError) { "\xA9\xF0".encode("utf-8", 'GBK') } + assert_undefined_in("\xA9\xF0", 'GBK') check_both_ways("\u7588", "\xAF\x40", 'GBK') # 疈 check_both_ways("\u7607", "\xAF\x7E", 'GBK') # 瘇 check_both_ways("\u7608", "\xAF\x80", 'GBK') # 瘈 check_both_ways("\u7644", "\xAF\xA0", 'GBK') # 癄 - assert_raise(Encoding::UndefinedConversionError) { "\xAF\xA1".encode("utf-8", 'GBK') } + assert_undefined_in("\xAF\xA1", 'GBK') check_both_ways("\u7645", "\xB0\x40", 'GBK') # 癅 check_both_ways("\u769B", "\xB0\x7E", 'GBK') # 皛 check_both_ways("\u769C", "\xB0\x80", 'GBK') # 皜 @@ -1841,10 +1852,10 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u9F78", "\xFD\x7E", 'GBK') # 齸 check_both_ways("\u9F79", "\xFD\x80", 'GBK') # 齹 check_both_ways("\uF9F1", "\xFD\xA0", 'GBK') # 隣 - assert_raise(Encoding::UndefinedConversionError) { "\xFD\xA1".encode("utf-8", 'GBK') } + assert_undefined_in("\xFD\xA1", 'GBK') check_both_ways("\uFA0C", "\xFE\x40", 'GBK') # 兀 check_both_ways("\uFA29", "\xFE\x4F", 'GBK') # 﨩 - assert_raise(Encoding::UndefinedConversionError) { "\xFE\x50".encode("utf-8", 'GBK') } + assert_undefined_in("\xFE\x50", 'GBK') check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GBK') # 青山学院大学 check_both_ways("\u795E\u6797\u7FA9\u535A", "\xC9\xF1\xC1\xD6\xC1\x78\xB2\xA9", 'GBK') # 神林義博 end @@ -1880,48 +1891,48 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u3000", "\xA1\xA1", 'GB18030') # full-width space check_both_ways("\u3001", "\xA1\xA2", 'GB18030') # check_both_ways("\u3013", "\xA1\xFE", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xA2\xA0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA2\xA0", 'GB18030') check_both_ways("\u2170", "\xA2\xA1", 'GB18030') # ⅰ - #assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA2\xB0", 'GB18030') check_both_ways("\u2488", "\xA2\xB1", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA2\xE4", 'GB18030') check_both_ways("\u3220", "\xA2\xE5", 'GB18030') # ㈠ - #assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA2\xF0", 'GB18030') check_both_ways("\u2160", "\xA2\xF1", 'GB18030') # Ⅰ - #assert_raise(Encoding::UndefinedConversionError) { "\xA3\xA0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA3\xA0", 'GB18030') check_both_ways("\uFF01", "\xA3\xA1", 'GB18030') # E check_both_ways("\uFFE3", "\xA3\xFE", 'GB18030') # E - #assert_raise(Encoding::UndefinedConversionError) { "\xA4\xA0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA4\xA0", 'GB18030') check_both_ways("\u3041", "\xA4\xA1", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xA5\xA0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA5\xA0", 'GB18030') check_both_ways("\u30A1", "\xA5\xA1", 'GB18030') # ァ check_both_ways("\u0391", "\xA6\xA1", 'GB18030') # check_both_ways("\u03B1", "\xA6\xC1", 'GB18030') # α - #assert_raise(Encoding::UndefinedConversionError) { "\xA6\xED".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA6\xED", 'GB18030') check_both_ways("\uFE3B", "\xA6\xEE", 'GB18030') # E check_both_ways("\u0410", "\xA7\xA1", 'GB18030') # check_both_ways("\u0430", "\xA7\xD1", 'GB18030') # а check_both_ways("\u02CA", "\xA8\x40", 'GB18030') # check_both_ways("\u2587", "\xA8\x7E", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xA8\x96".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA8\x96", 'GB18030') check_both_ways("\u0101", "\xA8\xA1", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBC".encode("utf-8", 'GB18030') } - #assert_raise(Encoding::UndefinedConversionError) { "\xA8\xBF".encode("utf-8", 'GB18030') } - #assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA8\xBC", 'GB18030') + #assert_undefined_in("\xA8\xBF", 'GB18030') + #assert_undefined_in("\xA8\xC4", 'GB18030') check_both_ways("\u3105", "\xA8\xC5", 'GB18030') # check_both_ways("\u3021", "\xA9\x40", 'GB18030') # 〡 - #assert_raise(Encoding::UndefinedConversionError) { "\xA9\x58".encode("utf-8", 'GB18030') } - #assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5B".encode("utf-8", 'GB18030') } - #assert_raise(Encoding::UndefinedConversionError) { "\xA9\x5D".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA9\x58", 'GB18030') + #assert_undefined_in("\xA9\x5B", 'GB18030') + #assert_undefined_in("\xA9\x5D", 'GB18030') check_both_ways("\u3007", "\xA9\x96", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA9\xA3", 'GB18030') check_both_ways("\u2500", "\xA9\xA4", 'GB18030') # ─ - #assert_raise(Encoding::UndefinedConversionError) { "\xA9\xF0".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xA9\xF0", 'GB18030') check_both_ways("\u7588", "\xAF\x40", 'GB18030') # check_both_ways("\u7607", "\xAF\x7E", 'GB18030') # check_both_ways("\u7608", "\xAF\x80", 'GB18030') # check_both_ways("\u7644", "\xAF\xA0", 'GB18030') # - #assert_raise(Encoding::UndefinedConversionError) { "\xAF\xA1".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xAF\xA1", 'GB18030') check_both_ways("\u7645", "\xB0\x40", 'GB18030') # check_both_ways("\u769B", "\xB0\x7E", 'GB18030') # check_both_ways("\u769C", "\xB0\x80", 'GB18030') # @@ -1962,10 +1973,10 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u9F78", "\xFD\x7E", 'GB18030') # 齸 check_both_ways("\u9F79", "\xFD\x80", 'GB18030') # 齹 check_both_ways("\uF9F1", "\xFD\xA0", 'GB18030') # E - #assert_raise(Encoding::UndefinedConversionError) { "\xFD\xA1".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xFD\xA1", 'GB18030') check_both_ways("\uFA0C", "\xFE\x40", 'GB18030') # E check_both_ways("\uFA29", "\xFE\x4F", 'GB18030') # E - #assert_raise(Encoding::UndefinedConversionError) { "\xFE\x50".encode("utf-8", 'GB18030') } + #assert_undefined_in("\xFE\x50", 'GB18030') check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GB18030') # 青山学院大学 check_both_ways("\u795E\u6797\u7FA9\u535A", "\xC9\xF1\xC1\xD6\xC1\x78\xB2\xA9", 'GB18030') # 神林義 @@ -2020,7 +2031,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u310F", "\xA3\x7E", 'Big5') # ㄏ check_both_ways("\u3110", "\xA3\xA1", 'Big5') # ㄐ check_both_ways("\u02CB", "\xA3\xBF", 'Big5') # ˋ - assert_raise(Encoding::UndefinedConversionError) { "\xA3\xC0".encode("utf-8", 'Big5') } + assert_undefined_in("\xA3\xC0", 'Big5') check_both_ways("\u6D6C", "\xAF\x40", 'Big5') # 浬 check_both_ways("\u7837", "\xAF\x7E", 'Big5') # 砷 check_both_ways("\u7825", "\xAF\xA1", 'Big5') # 砥 @@ -2039,9 +2050,9 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u77AC", "\xC0\xFE", 'Big5') # 瞬 check_both_ways("\u8B96", "\xC6\x40", 'Big5') # 讖 check_both_ways("\u7C72", "\xC6\x7E", 'Big5') # 籲 - #assert_raise(Encoding::UndefinedConversionError) { "\xC6\xA1".encode("utf-8", 'Big5') } - #assert_raise(Encoding::UndefinedConversionError) { "\xC7\x40".encode("utf-8", 'Big5') } - #assert_raise(Encoding::UndefinedConversionError) { "\xC8\x40".encode("utf-8", 'Big5') } + #assert_undefined_in("\xC6\xA1", 'Big5') + #assert_undefined_in("\xC7\x40", 'Big5') + #assert_undefined_in("\xC8\x40", 'Big5') check_both_ways("\u4E42", "\xC9\x40", 'Big5') # 乂 check_both_ways("\u6C15", "\xC9\x7E", 'Big5') # 氕 check_both_ways("\u6C36", "\xC9\xA1", 'Big5') # 氶 @@ -2074,7 +2085,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u9F0A", "\xF9\x7E", 'Big5') # 鼊 check_both_ways("\u9FA4", "\xF9\xA1", 'Big5') # 龤 check_both_ways("\u9F98", "\xF9\xD5", 'Big5') # 龘 - #assert_raise(Encoding::UndefinedConversionError) { "\xF9\xD6".encode("utf-8", 'Big5') } + #assert_undefined_in("\xF9\xD6", 'Big5') check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5') # 神林義博 end @@ -2087,7 +2098,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u310F", "\xA3\x7E", 'Big5-HKSCS') # ㄏ check_both_ways("\u3110", "\xA3\xA1", 'Big5-HKSCS') # ㄐ check_both_ways("\u02CB", "\xA3\xBF", 'Big5-HKSCS') # ˋ - #assert_raise(Encoding::UndefinedConversionError) { "\xA3\xC0".encode("utf-8", 'Big5-HKSCS') } + #assert_undefined_in("\xA3\xC0", 'Big5-HKSCS') check_both_ways("\u6D6C", "\xAF\x40", 'Big5-HKSCS') # 浬 check_both_ways("\u7837", "\xAF\x7E", 'Big5-HKSCS') # 砷 check_both_ways("\u7825", "\xAF\xA1", 'Big5-HKSCS') # 砥 @@ -2106,9 +2117,9 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u77AC", "\xC0\xFE", 'Big5-HKSCS') # 瞬 check_both_ways("\u8B96", "\xC6\x40", 'Big5-HKSCS') # 讖 check_both_ways("\u7C72", "\xC6\x7E", 'Big5-HKSCS') # 籲 - #assert_raise(Encoding::UndefinedConversionError) { "\xC6\xA1".encode("utf-8", 'Big5-HKSCS') } - #assert_raise(Encoding::UndefinedConversionError) { "\xC7\x40".encode("utf-8", 'Big5-HKSCS') } - #assert_raise(Encoding::UndefinedConversionError) { "\xC8\x40".encode("utf-8", 'Big5-HKSCS') } + #assert_undefined_in("\xC6\xA1", 'Big5-HKSCS') + #assert_undefined_in("\xC7\x40", 'Big5-HKSCS') + #assert_undefined_in("\xC8\x40", 'Big5-HKSCS') check_both_ways("\u4E42", "\xC9\x40", 'Big5-HKSCS') # 乂 check_both_ways("\u6C15", "\xC9\x7E", 'Big5-HKSCS') # 氕 check_both_ways("\u6C36", "\xC9\xA1", 'Big5-HKSCS') # 氶 @@ -2142,7 +2153,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u9FA4", "\xF9\xA1", 'Big5-HKSCS') # 龤 check_both_ways("\u9F98", "\xF9\xD5", 'Big5-HKSCS') # 龘 #check_both_ways("\u{23ED7}", "\x8E\x40", 'Big5-HKSCS') # 𣻗 - #assert_raise(Encoding::UndefinedConversionError) { "\xF9\xD6".encode("utf-8", 'Big5-HKSCS') } + #assert_undefined_in("\xF9\xD6", 'Big5-HKSCS') check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5-HKSCS') # 神林義博 end @@ -2232,12 +2243,12 @@ class TestTranscode < Test::Unit::TestCase assert_equal("U+3042", "\u{3042}".encode("US-ASCII", fallback: fallback)) end - bug8940 = '[ruby-core:57318] [Bug #8940]' - %w[UTF-32 UTF-16].each do |enc| - define_method("test_pseudo_encoding_inspect(#{enc})") do - assert_normal_exit("'aaa'.encode('#{enc}').inspect", bug8940) - assert_equal(4, 'aaa'.encode(enc).length, "should count in #{enc} with BOM") - end + def test_pseudo_encoding_inspect + s = 'aaa'.encode "UTF-16" + assert_equal '"\xFE\xFF\x00\x61\x00\x61\x00\x61"', s.inspect + + s = 'aaa'.encode "UTF-32" + assert_equal '"\x00\x00\xFE\xFF\x00\x00\x00\x61\x00\x00\x00\x61\x00\x00\x00\x61"', s.inspect end def test_encode_with_invalid_chars @@ -2275,7 +2286,7 @@ class TestTranscode < Test::Unit::TestCase result = th.map(&:value) end end - expected = "\xa4\xa2".force_encoding(Encoding::EUC_JP) + expected = "\xa4\xa2".dup.force_encoding(Encoding::EUC_JP) assert_equal([expected]*num, result, bug11277) end; end @@ -2305,5 +2316,37 @@ class TestTranscode < Test::Unit::TestCase assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true)) assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf)) + assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true)) + assert_equal("A\nB\nC", s.encode(usascii, newline: :lf)) + end + + private + + def assert_conversion_both_ways_utf8(utf8, raw, encoding) + assert_conversion_both_ways(utf8, 'utf-8', raw, encoding) + end + alias check_both_ways assert_conversion_both_ways_utf8 + + def assert_conversion_both_ways(str1, enc1, str2, enc2) + message = str1.dump+str2.dump + assert_equal(str1.force_encoding(enc1), str2.encode(enc1, enc2), message) + assert_equal(str2.force_encoding(enc2), str1.encode(enc2, enc1), message) + end + alias check_both_ways2 assert_conversion_both_ways + + def assert_undefined_conversion(str, to, from = nil) + assert_raise(Encoding::UndefinedConversionError) { str.encode(to, from) } + end + + def assert_undefined_in(str, encoding) + assert_undefined_conversion(str, 'utf-8', encoding) + end + + def assert_invalid_byte_sequence(str, to, from = nil) + assert_raise(Encoding::InvalidByteSequenceError) { str.encode(to, from) } + end + + def assert_invalid_in(str, encoding) + assert_invalid_byte_sequence(str, 'utf-8', encoding) end end |