diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-11-23 18:23:03 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-11-23 18:23:03 +0000 |
commit | 3ab82a65d701db47cea2ccbe9e7821468e71dd90 (patch) | |
tree | 68db643ca6085ee513b6e6396a659c66c3353d88 | |
parent | 952006388f528513c43eb105857fda761d5a9273 (diff) |
* enc/trans/utf_16_32.trans: raise error on unpaired upper
surrogates.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29891 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | enc/trans/utf_16_32.trans | 16 | ||||
-rw-r--r-- | test/ruby/test_transcode.rb | 2 |
3 files changed, 15 insertions, 8 deletions
@@ -1,3 +1,8 @@ +Wed Nov 24 03:21:35 2010 NARUSE, Yui <naruse@ruby-lang.org> + + * enc/trans/utf_16_32.trans: raise error on unpaired upper + surrogates. + Wed Nov 24 01:40:23 2010 NARUSE, Yui <naruse@ruby-lang.org> * enc/utf_16_32.h: add UTF-16 and UTF-32 as a dummy encoding. diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 4b1eca796d..2d7005993e 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -289,20 +289,20 @@ fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l) } break; case BE: - if (0xD8 <= s[0] && s[0] <= 0xDB) { - return (VALUE)from_UTF_16BE_D8toDB_00toFF; - } - else { + if (s[0] < 0xD8 && 0xDF < s[0]) { return (VALUE)FUNso; } + else if (s[0] <= 0xDB) { + return (VALUE)from_UTF_16BE_D8toDB_00toFF; + } break; case LE: - if (0xD8 <= s[1] && s[1] <= 0xDB) { - return (VALUE)from_UTF_16LE_00toFF_D8toDB; - } - else { + if (s[1] < 0xD8 && 0xDF < s[1]) { return (VALUE)FUNso; } + else if (s[1] <= 0xDB) { + return (VALUE)from_UTF_16LE_00toFF_D8toDB; + } break; } return (VALUE)INVALID; diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 0a12405c20..bf4ca2bf6d 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -1023,6 +1023,8 @@ class TestTranscode < Test::Unit::TestCase expected = "\u{3042}\u{3044}\u{20bb7}" assert_equal(expected, %w/fffe4230443042d8b7df/.pack("H*").encode("UTF-8","UTF-16")) assert_equal(expected, %w/feff30423044d842dfb7/.pack("H*").encode("UTF-8","UTF-16")) + assert_raise(Encoding::InvalidByteSequenceError){%w/feffdfb7/.pack("H*").encode("UTF-8","UTF-16")) + assert_raise(Encoding::InvalidByteSequenceError){%w/fffeb7df/.pack("H*").encode("UTF-8","UTF-16")) end def check_utf_32_both_ways(utf8, raw) |