diff options
author | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-03-25 17:01:54 +0000 |
---|---|---|
committer | usa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-03-25 17:01:54 +0000 |
commit | ad075f697ddad20d9d26cfea913f5255704f8a5f (patch) | |
tree | 9e04440487f01d111894c57af26c9db6359eac84 | |
parent | b824ac5f129be6bdc53b500323e8bc1ca754f999 (diff) |
merge revision(s) 57816,57817: [Backport #13292]
fix UTF-32 valid_encoding?
* enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely.
[ruby-core:79966] [Bug #13292]
* enc/utf_32le.c (utf32le_mbc_enc_len): ditto.
* regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid
Unicode codepoints.
fix UTF-32 valid_encoding?
* test/ruby/test_io_m17n.rb (TestIO_M17N#test_puts_widechar): do
not use invalid codepoint. [ruby-core:79966] [Bug #13292]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_2@58103 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | enc/utf_32be.c | 18 | ||||
-rw-r--r-- | enc/utf_32le.c | 18 | ||||
-rw-r--r-- | regenc.h | 3 | ||||
-rw-r--r-- | test/ruby/enc/test_utf32.rb | 68 | ||||
-rw-r--r-- | test/ruby/test_io_m17n.rb | 4 | ||||
-rw-r--r-- | version.h | 2 |
6 files changed, 104 insertions, 9 deletions
diff --git a/enc/utf_32be.c b/enc/utf_32be.c index 43c07e2e8f..99b1004e0c 100644 --- a/enc/utf_32be.c +++ b/enc/utf_32be.c @@ -29,11 +29,23 @@ #include "regenc.h" +static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); static int -utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e, + OnigEncoding enc) { - return 4; + if (e < p) { + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + } + else if (e-p < 4) { + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p)); + } + else { + OnigCodePoint c = utf32be_mbc_to_code(p, e, enc); + if (!UNICODE_VALID_CODEPOINT_P(c)) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } } static int diff --git a/enc/utf_32le.c b/enc/utf_32le.c index 31693eed05..58fb3ce0aa 100644 --- a/enc/utf_32le.c +++ b/enc/utf_32le.c @@ -29,11 +29,23 @@ #include "regenc.h" +static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); static int -utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) +utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e, + OnigEncoding enc) { - return 4; + if (e < p) { + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + } + else if (e-p < 4) { + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p)); + } + else { + OnigCodePoint c = utf32le_mbc_to_code(p, e, enc); + if (!UNICODE_VALID_CODEPOINT_P(c)) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } } static int @@ -169,6 +169,9 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) #define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) +#define UNICODE_VALID_CODEPOINT_P(c) ( \ + ((c) <= 0x10ffff) && \ + !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8))) #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ OnigEncISO_8859_1_ToLowerCaseTable[c] diff --git a/test/ruby/enc/test_utf32.rb b/test/ruby/enc/test_utf32.rb index 29a2240598..4ce205df18 100644 --- a/test/ruby/enc/test_utf32.rb +++ b/test/ruby/enc/test_utf32.rb @@ -89,5 +89,73 @@ EOT assert_equal(sl, "a".ord.chr("utf-32le")) assert_equal(sb, "a".ord.chr("utf-32be")) end + + def test_utf32be_valid_encoding + all_assertions do |a| + [ + "\x00\x00\x00\x00", + "\x00\x00\x00a", + "\x00\x00\x30\x40", + "\x00\x00\xd7\xff", + "\x00\x00\xe0\x00", + "\x00\x00\xff\xff", + "\x00\x10\xff\xff", + ].each {|s| + s.force_encoding("utf-32be") + a.for(s) { + assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?") + } + } + [ + "a", + "\x00a", + "\x00\x00a", + "\x00\x00\xd8\x00", + "\x00\x00\xdb\xff", + "\x00\x00\xdc\x00", + "\x00\x00\xdf\xff", + "\x00\x11\x00\x00", + ].each {|s| + s.force_encoding("utf-32be") + a.for(s) { + assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?") + } + } + end + end + + def test_utf32le_valid_encoding + all_assertions do |a| + [ + "\x00\x00\x00\x00", + "a\x00\x00\x00", + "\x40\x30\x00\x00", + "\xff\xd7\x00\x00", + "\x00\xe0\x00\x00", + "\xff\xff\x00\x00", + "\xff\xff\x10\x00", + ].each {|s| + s.force_encoding("utf-32le") + a.for(s) { + assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?") + } + } + [ + "a", + "a\x00", + "a\x00\x00", + "\x00\xd8\x00\x00", + "\xff\xdb\x00\x00", + "\x00\xdc\x00\x00", + "\xff\xdf\x00\x00", + "\x00\x00\x11\x00", + ].each {|s| + s.force_encoding("utf-32le") + a.for(s) { + assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?") + } + } + end + end end diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 0f1b1644f5..19dd21104e 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2203,7 +2203,7 @@ EOT w.binmode w.puts(0x010a.chr(Encoding::UTF_32BE)) w.puts(0x010a.chr(Encoding::UTF_16BE)) - w.puts(0x0a010000.chr(Encoding::UTF_32LE)) + w.puts(0x0a01.chr(Encoding::UTF_32LE)) w.puts(0x0a01.chr(Encoding::UTF_16LE)) w.close end, @@ -2211,7 +2211,7 @@ EOT r.binmode assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug) assert_equal("\x01\x0a\n", r.read(3), bug) - assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug) + assert_equal("\x01\x0a\x00\x00\n", r.read(5), bug) assert_equal("\x01\x0a\n", r.read(3), bug) assert_equal("", r.read, bug) r.close @@ -1,6 +1,6 @@ #define RUBY_VERSION "2.2.7" #define RUBY_RELEASE_DATE "2017-03-26" -#define RUBY_PATCHLEVEL 435 +#define RUBY_PATCHLEVEL 436 #define RUBY_RELEASE_YEAR 2017 #define RUBY_RELEASE_MONTH 3 |