summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-01-04 08:55:04 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-01-04 08:55:04 +0000
commitfecce9e5e5293ebc8fbb161e8b5ecc3884a27d4e (patch)
tree71019ef9b586e201b4452fae3035bb19398b216d
parent3bc30f0b73d917360b0a550eafbe6894fbf0b334 (diff)
* test/ruby/test_transcode.rb: added tests for GB2312
(from Yoshihiro Kambayashi) * enc/trans/chinese.trans: set valid byte patterns for GB2312 and GB12345 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@21314 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog8
-rw-r--r--enc/trans/chinese.trans3
-rw-r--r--test/ruby/test_transcode.rb50
3 files changed, 60 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 01e19a1575..b03d23903b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
Sun Jan 4 17:39:39 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
+ * test/ruby/test_transcode.rb: added tests for GB2312
+ (from Yoshihiro Kambayashi)
+
+ * enc/trans/chinese.trans: set valid byte patterns for
+ GB2312 and GB12345
+
+Sun Jan 4 17:39:39 2009 Martin Duerst <duerst@it.aoyama.ac.jp>
+
* enc/trans/big5.trans, big5-tbl.rb:
new Chinese Big5 transcoding (from Yoshihiro Kambayashi)
diff --git a/enc/trans/chinese.trans b/enc/trans/chinese.trans
index 9221b03292..1db6565254 100644
--- a/enc/trans/chinese.trans
+++ b/enc/trans/chinese.trans
@@ -1,6 +1,9 @@
#include "transcode_data.h"
<%
+ set_valid_byte_pattern 'GB2312', 'EUC-KR'
+ set_valid_byte_pattern 'GB12345', 'EUC-KR'
+
transcode_tblgen "GB2312", "UTF-8",
[["{00-7f}", :nomap]] +
citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS")
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index ddf0f99bf0..f746c9248b 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -1400,7 +1400,55 @@ class TestTranscode < Test::Unit::TestCase
assert_raise(Encoding::UndefinedConversionError) { "\u203E".encode("CP51932") }
end
- def test_Big5
+ def test_gb2312
+ check_both_ways("\u3000", "\xA1\xA1", 'GB2312') # full-width space
+ check_both_ways("\u3013", "\xA1\xFE", 'GB2312') # 〓
+ assert_raise(Encoding::UndefinedConversionError) { "\xA2\xB0".encode("utf-8", 'GB2312') }
+ check_both_ways("\u2488", "\xA2\xB1", 'GB2312') # ⒈
+ assert_raise(Encoding::UndefinedConversionError) { "\xA2\xE4".encode("utf-8", 'GB2312') }
+ check_both_ways("\u3220", "\xA2\xE5", 'GB2312') # ㈠
+ assert_raise(Encoding::UndefinedConversionError) { "\xA2\xF0".encode("utf-8", 'GB2312') }
+ check_both_ways("\u2160", "\xA2\xF1", 'GB2312') # Ⅰ
+ check_both_ways("\uFF01", "\xA3\xA1", 'GB2312') # !
+ check_both_ways("\uFFE3", "\xA3\xFE", 'GB2312') #  ̄
+ check_both_ways("\u3041", "\xA4\xA1", 'GB2312') # ぁ
+ check_both_ways("\u30A1", "\xA5\xA1", 'GB2312') # ァ
+ check_both_ways("\u0391", "\xA6\xA1", 'GB2312') # Α
+ check_both_ways("\u03B1", "\xA6\xC1", 'GB2312') # α
+ check_both_ways("\u0410", "\xA7\xA1", 'GB2312') # А
+ check_both_ways("\u0430", "\xA7\xD1", 'GB2312') # а
+ check_both_ways("\u0101", "\xA8\xA1", 'GB2312') # ā
+ assert_raise(Encoding::UndefinedConversionError) { "\xA8\xC4".encode("utf-8", 'GB2312') }
+ check_both_ways("\u3105", "\xA8\xC5", 'GB2312') # ㄅ
+ assert_raise(Encoding::UndefinedConversionError) { "\xA9\xA3".encode("utf-8", 'GB2312') }
+ check_both_ways("\u2500", "\xA9\xA4", 'GB2312') # ─
+ check_both_ways("\u554A", "\xB0\xA1", 'GB2312') # 啊
+ check_both_ways("\u5265", "\xB0\xFE", 'GB2312') # 剥
+ check_both_ways("\u4FCA", "\xBF\xA1", 'GB2312') # 俊
+ check_both_ways("\u5080", "\xBF\xFE", 'GB2312') # 傀
+ check_both_ways("\u9988", "\xC0\xA1", 'GB2312') # 馈
+ check_both_ways("\u4FD0", "\xC0\xFE", 'GB2312') # 俐
+ check_both_ways("\u7A00", "\xCF\xA1", 'GB2312') # 稀
+ check_both_ways("\u6653", "\xCF\xFE", 'GB2312') # 晓
+ check_both_ways("\u5C0F", "\xD0\xA1", 'GB2312') # 小
+ check_both_ways("\u7384", "\xD0\xFE", 'GB2312') # 玄
+ check_both_ways("\u4F4F", "\xD7\xA1", 'GB2312') # 住
+ check_both_ways("\u5EA7", "\xD7\xF9", 'GB2312') # 座
+ assert_raise(Encoding::UndefinedConversionError) { "\xD7\xFA".encode("utf-8", 'GB2312') }
+ check_both_ways("\u647A", "\xDF\xA1", 'GB2312') # 摺
+ check_both_ways("\u553C", "\xDF\xFE", 'GB2312') # 唼
+ check_both_ways("\u5537", "\xE0\xA1", 'GB2312') # 唷
+ check_both_ways("\u5E3C", "\xE0\xFE", 'GB2312') # 帼
+ check_both_ways("\u94E9", "\xEF\xA1", 'GB2312') # 铩
+ check_both_ways("\u7A14", "\xEF\xFE", 'GB2312') # 稔
+ check_both_ways("\u7A39", "\xF0\xA1", 'GB2312') # 稹
+ check_both_ways("\u7619", "\xF0\xFE", 'GB2312') # 瘙
+ check_both_ways("\u9CCC", "\xF7\xA1", 'GB2312') # 鳌
+ check_both_ways("\u9F44", "\xF7\xFE", 'GB2312') # 齄
+ check_both_ways("\u9752\u5C71\u5B66\u9662\u5927\u5B66", "\xC7\xE0\xC9\xBD\xD1\xA7\xD4\xBA\xB4\xF3\xD1\xA7", 'GB2312') # 青山学院大学
+ end
+
+ def test_Big5
check_both_ways("\u3000", "\xA1\x40", 'Big5') # full-width space
check_both_ways("\uFE5A", "\xA1\x7E", 'Big5') # ﹚
check_both_ways("\uFE5B", "\xA1\xA1", 'Big5') # ﹛