summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--enc/trans/utf_16_32.trans16
-rw-r--r--test/ruby/test_transcode.rb2
3 files changed, 15 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 0c8642e550..2f778111b3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Wed Nov 24 03:21:35 2010 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * enc/trans/utf_16_32.trans: raise error on unpaired upper
+ surrogates.
+
Wed Nov 24 01:40:23 2010 NARUSE, Yui <naruse@ruby-lang.org>
* enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding.
diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans
index 4b1eca796d..2d7005993e 100644
--- a/enc/trans/utf_16_32.trans
+++ b/enc/trans/utf_16_32.trans
@@ -289,20 +289,20 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
}
break;
case BE:
- if (0xD8 <= s[0] && s[0] <= 0xDB) {
- return (VALUE)from_UTF_16BE_D8toDB_00toFF;
- }
- else {
+ if (s[0] < 0xD8 && 0xDF < s[0]) {
return (VALUE)FUNso;
}
+ else if (s[0] <= 0xDB) {
+ return (VALUE)from_UTF_16BE_D8toDB_00toFF;
+ }
break;
case LE:
- if (0xD8 <= s[1] && s[1] <= 0xDB) {
- return (VALUE)from_UTF_16LE_00toFF_D8toDB;
- }
- else {
+ if (s[1] < 0xD8 && 0xDF < s[1]) {
return (VALUE)FUNso;
}
+ else if (s[1] <= 0xDB) {
+ return (VALUE)from_UTF_16LE_00toFF_D8toDB;
+ }
break;
}
return (VALUE)INVALID;
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 0a12405c20..bf4ca2bf6d 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -1023,6 +1023,8 @@ class TestTranscode < Test::Unit::TestCase
expected = "\u{3042}\u{3044}\u{20bb7}"
assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16"))
assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16"))
+ assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16"))
+ assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16"))
end
def check_utf_32_both_ways(utf8, raw)