summaryrefslogtreecommitdiff
path: root/test/ruby/test_m17n.rb
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-04-19 17:50:38 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2013-04-19 17:50:38 +0000
commit394d5dfa9ba625c99a1e6a411f81b628bfebd60a (patch)
tree22a788ad3df769fa9c3b4f5f81efde5b1cad87a4 /test/ruby/test_m17n.rb
parent57ffc79c4561b9249ef6b81101f1887f16f29e19 (diff)
* string.c (str_scrub): add ruby method String#scrub which verify and
fix invalid byte sequence. * string.c (str_compat_and_valid): check given string is compatible and valid with given encoding. * transcode.c (str_transcode0): If invalid: :replace is specified for String#encode, replace invalid byte sequence even if the destination encoding equals to the source encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40390 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'test/ruby/test_m17n.rb')
-rw-r--r--test/ruby/test_m17n.rb34
1 files changed, 34 insertions, 0 deletions
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index a8d56a4a56..60834bb9c6 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -1489,4 +1489,38 @@ class TestM17N < Test::Unit::TestCase
s.untrust
assert_equal(true, s.b.untrusted?)
end
+
+ def test_scrub
+ assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
+ assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
+
+ # exapmles in Unicode 6.1.0 D93b
+ assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41",
+ u("\x41\xC0\xAF\x41\xF4\x80\x80\x41").scrub)
+ assert_equal("\x41\uFFFD\uFFFD\uFFFD\x41",
+ u("\x41\xE0\x9F\x80\x41").scrub)
+ assert_equal("\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064",
+ u("\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub)
+ assert_equal("abcdefghijklmnopqrstuvwxyz\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064",
+ u("abcdefghijklmnopqrstuvwxyz\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub)
+
+ assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013"))
+ assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) }
+ assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) }
+ assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) }
+ assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub(e("\xA2\xAE")))
+
+ assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'})
+ assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} }
+ assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} }
+ assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} }
+ assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub{e("\xA2\xAE")})
+
+ assert_equal("\uFFFD\u3042".encode("UTF-16BE"),
+ "\xD8\x00\x30\x42".force_encoding(Encoding::UTF_16BE).
+ scrub)
+ assert_equal("\uFFFD\u3042".encode("UTF-16LE"),
+ "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE).
+ scrub)
+ end
end