From 166d8dc2d62efbbfcbb6c708c920ddd53f96ebc5 Mon Sep 17 00:00:00 2001 From: ktsj Date: Sun, 2 Jun 2013 14:36:41 +0000 Subject: * enc/trans/japanese_euc.trans, test/ruby/test_transcode.rb, tool/transcode-tblgen.rb: change EUC-JP-2004 to EUC-JIS-2004. This is follow up to changes in r41024. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@41035 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 +++ enc/trans/japanese_euc.trans | 4 +- test/ruby/test_transcode.rb | 120 +++++++++++++++++++++---------------------- tool/transcode-tblgen.rb | 98 +++++++++++++++++------------------ 4 files changed, 117 insertions(+), 111 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0dd9a2ecdf..4050265b21 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Sun Jun 2 23:33:42 2013 Kazuki Tsujimoto + + * enc/trans/japanese_euc.trans, test/ruby/test_transcode.rb, + tool/transcode-tblgen.rb: change EUC-JP-2004 to EUC-JIS-2004. + This is follow up to changes in r41024. + Sun Jun 2 22:44:42 2013 NARUSE, Yui * ext/socket/option.c: rename functions introduced in r41009 diff --git a/enc/trans/japanese_euc.trans b/enc/trans/japanese_euc.trans index 0f63272321..d96f69feda 100644 --- a/enc/trans/japanese_euc.trans +++ b/enc/trans/japanese_euc.trans @@ -18,7 +18,7 @@ citrus_decode_mapsrc("euc", 0x8080, "JISX0208VDC:NEC/UCS,CP932VDC:NEC_IBM/UCS,JISX0208:MS/UCS") + citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS") - transcode_tblgen "EUC-JP-2004", "UTF-8", + transcode_tblgen "EUC-JIS-2004", "UTF-8", [["{00-7f}", :nomap]] + citrus_decode_mapsrc("euc", 0x8080, "JISX0208:1990/UCS,JISX0213-1/UCS@BMP,JISX0213-1/UCS@SIP") + citrus_decode_mapsrc("euc", 0x0080, "JISX0201-KANA/UCS") + @@ -42,7 +42,7 @@ citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:MS,UCS/JISX0208VDC:NEC,UCS/CP932VDC:NEC_IBM") + citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA") - transcode_tblgen "UTF-8", "EUC-JP-2004", + transcode_tblgen "UTF-8", "EUC-JIS-2004", [["{00-7f}", :nomap]] + citrus_decode_mapsrc("euc", 0x8080, "UCS/JISX0208:1990,UCS@BMP/JISX0213-1,UCS@SIP/JISX0213-1") + citrus_decode_mapsrc("euc", 0x0080, "UCS/JISX0201-KANA") + diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 8052c82194..01e5fb756a 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -92,10 +92,10 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D", "\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp') check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D", - "\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jp-2004') + "\xa4\xde\xa4\xc4\xa4\xe2\xa4\xc8 \xa4\xe6\xa4\xad\xa4\xd2\xa4\xed", 'euc-jis-2004') check_both_ways("\u677E\u672C\u884C\u5F18", "\x8f\xbc\x96\x7b\x8d\x73\x8d\x4f", 'shift_jis') # 松本行弘 check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp') - check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jp-2004') + check_both_ways("\u677E\u672C\u884C\u5F18", "\xbe\xbe\xcb\xdc\xb9\xd4\xb9\xb0", 'euc-jis-2004') check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-1') # Dürst check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-2') check_both_ways("D\u00FCrst", "D\xFCrst", 'iso-8859-3') @@ -112,7 +112,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u0643\u062A\u0628", "\xE3\xCA\xC8", 'iso-8859-6') # كتب check_both_ways("\u65E5\u8A18", "\x93\xFA\x8BL", 'shift_jis') # 日記 check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp') - check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jp-2004') + check_both_ways("\u65E5\u8A18", "\xC6\xFC\xB5\xAD", 'euc-jis-2004') check_both_ways("\uC560\uC778\uAD6C\uD568\u0020\u6734\uC9C0\uC778", "\xBE\xD6\xC0\xCE\xB1\xB8\xC7\xD4\x20\xDA\xD3\xC1\xF6\xC0\xCE", 'euc-kr') # 애인구함 朴지인 check_both_ways("\uC544\uD58F\uD58F\u0020\uB620\uBC29\uD6BD\uB2D8\u0020\uC0AC\uB791\uD716", @@ -1185,15 +1185,15 @@ class TestTranscode < Test::Unit::TestCase assert_equal("\uFFFD!", "\xff!".encode("utf-8", "euc-jp", :invalid=>:replace)) assert_equal("\uFFFD!", - "\xff!".encode("utf-8", "euc-jp-2004", :invalid=>:replace)) + "\xff!".encode("utf-8", "euc-jis-2004", :invalid=>:replace)) assert_equal("\uFFFD!", "\xa1!".encode("utf-8", "euc-jp", :invalid=>:replace)) assert_equal("\uFFFD!", - "\xa1!".encode("utf-8", "euc-jp-2004", :invalid=>:replace)) + "\xa1!".encode("utf-8", "euc-jis-2004", :invalid=>:replace)) assert_equal("\uFFFD!", "\x8f\xa1!".encode("utf-8", "euc-jp", :invalid=>:replace)) assert_equal("\uFFFD!", - "\x8f\xa1!".encode("utf-8", "euc-jp-2004", :invalid=>:replace)) + "\x8f\xa1!".encode("utf-8", "euc-jis-2004", :invalid=>:replace)) assert_equal("?", "\xdc\x00".encode("EUC-JP", "UTF-16BE", :invalid=>:replace), "[ruby-dev:35776]") @@ -1210,7 +1210,7 @@ class TestTranscode < Test::Unit::TestCase def test_invalid_replace_string assert_equal("aA", "a\x80A".encode("us-ascii", "euc-jp", :invalid=>:replace, :replace=>"")) - assert_equal("aA", "a\x80A".encode("us-ascii", "euc-jp-2004", :invalid=>:replace, :replace=>"")) + assert_equal("aA", "a\x80A".encode("us-ascii", "euc-jis-2004", :invalid=>:replace, :replace=>"")) end def test_undef_replace @@ -1325,62 +1325,62 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jp') # 神林義博 end - def test_euc_jp_2004 - check_both_ways("\u3000", "\xA1\xA1", 'euc-jp-2004') # full-width space - check_both_ways("\u00D7", "\xA1\xDF", 'euc-jp-2004') # × - check_both_ways("\u00F7", "\xA1\xE0", 'euc-jp-2004') # ÷ - check_both_ways("\u25C7", "\xA1\xFE", 'euc-jp-2004') # ◇ - check_both_ways("\u25C6", "\xA2\xA1", 'euc-jp-2004') # ◆ - check_both_ways("\uFF07", "\xA2\xAF", 'euc-jp-2004') # ' - check_both_ways("\u309F", "\xA2\xB9", 'euc-jp-2004') # ゟ - check_both_ways("\u2284", "\xA2\xC2", 'euc-jp-2004') # ⊄ - check_both_ways("\u2306", "\xA2\xC9", 'euc-jp-2004') # ⌆ - check_both_ways("\u2295", "\xA2\xD1", 'euc-jp-2004') # ⊕ - check_both_ways("\u3017", "\xA2\xDB", 'euc-jp-2004') # 〗 - check_both_ways("\u2262", "\xA2\xEB", 'euc-jp-2004') # ≢ - check_both_ways("\u2194", "\xA2\xF1", 'euc-jp-2004') # ↔ - check_both_ways("\u266E", "\xA2\xFA", 'euc-jp-2004') # ♮ - check_both_ways("\u2669", "\xA2\xFD", 'euc-jp-2004') # ♩ - check_both_ways("\u25EF", "\xA2\xFE", 'euc-jp-2004') # ◯ - check_both_ways("\u2935", "\xA3\xAF", 'euc-jp-2004') # ⤵ - check_both_ways("\u29BF", "\xA3\xBA", 'euc-jp-2004') # ⦿ - check_both_ways("\u2022", "\xA3\xC0", 'euc-jp-2004') # • - check_both_ways("\u2213", "\xA3\xDB", 'euc-jp-2004') # ∓ - check_both_ways("\u2127", "\xA3\xE0", 'euc-jp-2004') # ℧ - check_both_ways("\u30A0", "\xA3\xFB", 'euc-jp-2004') # ゠ - check_both_ways("\uFF54", "\xA3\xF4", 'euc-jp-2004') # t - assert_raise(Encoding::UndefinedConversionError) { "\xA5\xF7".encode("utf-8", 'euc-jp-2004') } - check_both_ways("\u2664", "\xA6\xB9", 'euc-jp-2004') # ♤ - check_both_ways("\u2663", "\xA6\xC0", 'euc-jp-2004') # ♣ - check_both_ways("\u03C2", "\xA6\xD9", 'euc-jp-2004') # ς - check_both_ways("\u23BE", "\xA7\xC2", 'euc-jp-2004') # ⎾ - check_both_ways("\u23CC", "\xA7\xD0", 'euc-jp-2004') # ⏌ - check_both_ways("\u30F7", "\xA7\xF2", 'euc-jp-2004') # ヷ - check_both_ways("\u3251", "\xA8\xC1", 'euc-jp-2004') # ㉑ - check_both_ways("\u{20B9F}", "\xCF\xD4", 'euc-jp-2004') # 𠮑 - check_both_ways("\u541E", "\xCF\xFE", 'euc-jp-2004') # 吞 - check_both_ways("\u6A97", "\xDD\xA1", 'euc-jp-2004') # 檗 - check_both_ways("\u6BEF", "\xDD\xDF", 'euc-jp-2004') # 毯 - check_both_ways("\u9EBE", "\xDD\xE0", 'euc-jp-2004') # 麾 - check_both_ways("\u6CBE", "\xDD\xFE", 'euc-jp-2004') # 沾 - check_both_ways("\u6CBA", "\xDE\xA1", 'euc-jp-2004') # 沺 - check_both_ways("\u6ECC", "\xDE\xFE", 'euc-jp-2004') # 滌 - check_both_ways("\u6F3E", "\xDF\xA1", 'euc-jp-2004') # 漾 - check_both_ways("\u70DD", "\xDF\xDF", 'euc-jp-2004') # 烝 - check_both_ways("\u70D9", "\xDF\xE0", 'euc-jp-2004') # 烙 - check_both_ways("\u71FC", "\xDF\xFE", 'euc-jp-2004') # 燼 - check_both_ways("\u71F9", "\xE0\xA1", 'euc-jp-2004') # 燹 - check_both_ways("\u73F1", "\xE0\xFE", 'euc-jp-2004') # 珱 - check_both_ways("\u5653", "\xF4\xA7", 'euc-jp-2004') # 噓 + def test_euc_jis_2004 + check_both_ways("\u3000", "\xA1\xA1", 'euc-jis-2004') # full-width space + check_both_ways("\u00D7", "\xA1\xDF", 'euc-jis-2004') # × + check_both_ways("\u00F7", "\xA1\xE0", 'euc-jis-2004') # ÷ + check_both_ways("\u25C7", "\xA1\xFE", 'euc-jis-2004') # ◇ + check_both_ways("\u25C6", "\xA2\xA1", 'euc-jis-2004') # ◆ + check_both_ways("\uFF07", "\xA2\xAF", 'euc-jis-2004') # ' + check_both_ways("\u309F", "\xA2\xB9", 'euc-jis-2004') # ゟ + check_both_ways("\u2284", "\xA2\xC2", 'euc-jis-2004') # ⊄ + check_both_ways("\u2306", "\xA2\xC9", 'euc-jis-2004') # ⌆ + check_both_ways("\u2295", "\xA2\xD1", 'euc-jis-2004') # ⊕ + check_both_ways("\u3017", "\xA2\xDB", 'euc-jis-2004') # 〗 + check_both_ways("\u2262", "\xA2\xEB", 'euc-jis-2004') # ≢ + check_both_ways("\u2194", "\xA2\xF1", 'euc-jis-2004') # ↔ + check_both_ways("\u266E", "\xA2\xFA", 'euc-jis-2004') # ♮ + check_both_ways("\u2669", "\xA2\xFD", 'euc-jis-2004') # ♩ + check_both_ways("\u25EF", "\xA2\xFE", 'euc-jis-2004') # ◯ + check_both_ways("\u2935", "\xA3\xAF", 'euc-jis-2004') # ⤵ + check_both_ways("\u29BF", "\xA3\xBA", 'euc-jis-2004') # ⦿ + check_both_ways("\u2022", "\xA3\xC0", 'euc-jis-2004') # • + check_both_ways("\u2213", "\xA3\xDB", 'euc-jis-2004') # ∓ + check_both_ways("\u2127", "\xA3\xE0", 'euc-jis-2004') # ℧ + check_both_ways("\u30A0", "\xA3\xFB", 'euc-jis-2004') # ゠ + check_both_ways("\uFF54", "\xA3\xF4", 'euc-jis-2004') # t + assert_raise(Encoding::UndefinedConversionError) { "\xA5\xF7".encode("utf-8", 'euc-jis-2004') } + check_both_ways("\u2664", "\xA6\xB9", 'euc-jis-2004') # ♤ + check_both_ways("\u2663", "\xA6\xC0", 'euc-jis-2004') # ♣ + check_both_ways("\u03C2", "\xA6\xD9", 'euc-jis-2004') # ς + check_both_ways("\u23BE", "\xA7\xC2", 'euc-jis-2004') # ⎾ + check_both_ways("\u23CC", "\xA7\xD0", 'euc-jis-2004') # ⏌ + check_both_ways("\u30F7", "\xA7\xF2", 'euc-jis-2004') # ヷ + check_both_ways("\u3251", "\xA8\xC1", 'euc-jis-2004') # ㉑ + check_both_ways("\u{20B9F}", "\xCF\xD4", 'euc-jis-2004') # 𠮑 + check_both_ways("\u541E", "\xCF\xFE", 'euc-jis-2004') # 吞 + check_both_ways("\u6A97", "\xDD\xA1", 'euc-jis-2004') # 檗 + check_both_ways("\u6BEF", "\xDD\xDF", 'euc-jis-2004') # 毯 + check_both_ways("\u9EBE", "\xDD\xE0", 'euc-jis-2004') # 麾 + check_both_ways("\u6CBE", "\xDD\xFE", 'euc-jis-2004') # 沾 + check_both_ways("\u6CBA", "\xDE\xA1", 'euc-jis-2004') # 沺 + check_both_ways("\u6ECC", "\xDE\xFE", 'euc-jis-2004') # 滌 + check_both_ways("\u6F3E", "\xDF\xA1", 'euc-jis-2004') # 漾 + check_both_ways("\u70DD", "\xDF\xDF", 'euc-jis-2004') # 烝 + check_both_ways("\u70D9", "\xDF\xE0", 'euc-jis-2004') # 烙 + check_both_ways("\u71FC", "\xDF\xFE", 'euc-jis-2004') # 燼 + check_both_ways("\u71F9", "\xE0\xA1", 'euc-jis-2004') # 燹 + check_both_ways("\u73F1", "\xE0\xFE", 'euc-jis-2004') # 珱 + check_both_ways("\u5653", "\xF4\xA7", 'euc-jis-2004') # 噓 #check_both_ways("\u9ADC", "\xFC\xE3", 'euc-jp') # 髜 (IBM extended) - check_both_ways("\u9DD7", "\xFE\xE5", 'euc-jp-2004') # 鷗 - check_both_ways("\u{2000B}", "\xAE\xA2", 'euc-jp-2004') # 𠀋 - check_both_ways("\u{2A6B2}", "\x8F\xFE\xF6", 'euc-jp-2004') # 𪚲 + check_both_ways("\u9DD7", "\xFE\xE5", 'euc-jis-2004') # 鷗 + check_both_ways("\u{2000B}", "\xAE\xA2", 'euc-jis-2004') # 𠀋 + check_both_ways("\u{2A6B2}", "\x8F\xFE\xF6", 'euc-jis-2004') # 𪚲 - check_both_ways("\u677E\u672C\u884C\u5F18", "\xBE\xBE\xCB\xDC\xB9\xD4\xB9\xB0", 'euc-jp-2004') # 松本行弘 - check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC0\xC4\xBB\xB3\xB3\xD8\xB1\xA1\xC2\xE7\xB3\xD8", 'euc-jp-2004') # 青山学院大学 - check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jp-2004') # 神林義博 + check_both_ways("\u677E\u672C\u884C\u5F18", "\xBE\xBE\xCB\xDC\xB9\xD4\xB9\xB0", 'euc-jis-2004') # 松本行弘 + check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC0\xC4\xBB\xB3\xB3\xD8\xB1\xA1\xC2\xE7\xB3\xD8", 'euc-jis-2004') # 青山学院大学 + check_both_ways("\u795E\u6797\u7FA9\u535A", "\xBF\xC0\xCE\xD3\xB5\xC1\xC7\xEE", 'euc-jis-2004') # 神林義博 end def test_eucjp_ms diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index 0c5dbd6cea..832d9a4762 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -897,55 +897,55 @@ UnitLength = { UnitLength.default = 1 ValidEncoding = { - '1byte' => '{00-ff}', - '2byte' => '{00-ff}{00-ff}', - '4byte' => '{00-ff}{00-ff}{00-ff}{00-ff}', - 'US-ASCII' => '{00-7f}', - 'UTF-8' => '{00-7f} - {c2-df}{80-bf} - e0{a0-bf}{80-bf} - {e1-ec}{80-bf}{80-bf} - ed{80-9f}{80-bf} - {ee-ef}{80-bf}{80-bf} - f0{90-bf}{80-bf}{80-bf} - {f1-f3}{80-bf}{80-bf}{80-bf} - f4{80-8f}{80-bf}{80-bf}', - 'UTF-16BE' => '{00-d7,e0-ff}{00-ff} - {d8-db}{00-ff}{dc-df}{00-ff}', - 'UTF-16LE' => '{00-ff}{00-d7,e0-ff} - {00-ff}{d8-db}{00-ff}{dc-df}', - 'UTF-32BE' => '0000{00-d7,e0-ff}{00-ff} - 00{01-10}{00-ff}{00-ff}', - 'UTF-32LE' => '{00-ff}{00-d7,e0-ff}0000 - {00-ff}{00-ff}{01-10}00', - 'EUC-JP' => '{00-7f} - {a1-fe}{a1-fe} - 8e{a1-fe} - 8f{a1-fe}{a1-fe}', - 'CP51932' => '{00-7f} - {a1-fe}{a1-fe} - 8e{a1-fe}', - 'EUC-JP-2004' => '{00-7f} - {a1-fe}{a1-fe} - 8e{a1-fe} - 8f{a1-fe}{a1-fe}', - 'Shift_JIS' => '{00-7f} - {81-9f,e0-fc}{40-7e,80-fc} - {a1-df}', - 'EUC-KR' => '{00-7f} - {a1-fe}{a1-fe}', - 'CP949' => '{00-7f} - {81-fe}{41-5a,61-7a,81-fe}', - 'Big5' => '{00-7f} - {81-fe}{40-7e,a1-fe}', - 'EUC-TW' => '{00-7f} - {a1-fe}{a1-fe} - 8e{a1-b0}{a1-fe}{a1-fe}', - 'GBK' => '{00-80} - {81-fe}{40-7e,80-fe}', - 'GB18030' => '{00-7f} - {81-fe}{40-7e,80-fe} - {81-fe}{30-39}{81-fe}{30-39}', + '1byte' => '{00-ff}', + '2byte' => '{00-ff}{00-ff}', + '4byte' => '{00-ff}{00-ff}{00-ff}{00-ff}', + 'US-ASCII' => '{00-7f}', + 'UTF-8' => '{00-7f} + {c2-df}{80-bf} + e0{a0-bf}{80-bf} + {e1-ec}{80-bf}{80-bf} + ed{80-9f}{80-bf} + {ee-ef}{80-bf}{80-bf} + f0{90-bf}{80-bf}{80-bf} + {f1-f3}{80-bf}{80-bf}{80-bf} + f4{80-8f}{80-bf}{80-bf}', + 'UTF-16BE' => '{00-d7,e0-ff}{00-ff} + {d8-db}{00-ff}{dc-df}{00-ff}', + 'UTF-16LE' => '{00-ff}{00-d7,e0-ff} + {00-ff}{d8-db}{00-ff}{dc-df}', + 'UTF-32BE' => '0000{00-d7,e0-ff}{00-ff} + 00{01-10}{00-ff}{00-ff}', + 'UTF-32LE' => '{00-ff}{00-d7,e0-ff}0000 + {00-ff}{00-ff}{01-10}00', + 'EUC-JP' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-fe} + 8f{a1-fe}{a1-fe}', + 'CP51932' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-fe}', + 'EUC-JIS-2004' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-fe} + 8f{a1-fe}{a1-fe}', + 'Shift_JIS' => '{00-7f} + {81-9f,e0-fc}{40-7e,80-fc} + {a1-df}', + 'EUC-KR' => '{00-7f} + {a1-fe}{a1-fe}', + 'CP949' => '{00-7f} + {81-fe}{41-5a,61-7a,81-fe}', + 'Big5' => '{00-7f} + {81-fe}{40-7e,a1-fe}', + 'EUC-TW' => '{00-7f} + {a1-fe}{a1-fe} + 8e{a1-b0}{a1-fe}{a1-fe}', + 'GBK' => '{00-80} + {81-fe}{40-7e,80-fe}', + 'GB18030' => '{00-7f} + {81-fe}{40-7e,80-fe} + {81-fe}{30-39}{81-fe}{30-39}', } def ValidEncoding(enc) -- cgit v1.2.3