summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-03-25 17:01:54 +0000
committerusa <usa@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-03-25 17:01:54 +0000
commitad075f697ddad20d9d26cfea913f5255704f8a5f (patch)
tree9e04440487f01d111894c57af26c9db6359eac84
parentb824ac5f129be6bdc53b500323e8bc1ca754f999 (diff)
merge revision(s) 57816,57817: [Backport #13292]
fix UTF-32 valid_encoding? * enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely. [ruby-core:79966] [Bug #13292] * enc/utf_32le.c (utf32le_mbc_enc_len): ditto. * regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid Unicode codepoints. fix UTF-32 valid_encoding? * test/ruby/test_io_m17n.rb (TestIO_M17N#test_puts_widechar): do not use invalid codepoint. [ruby-core:79966] [Bug #13292] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_2@58103 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--enc/utf_32be.c18
-rw-r--r--enc/utf_32le.c18
-rw-r--r--regenc.h3
-rw-r--r--test/ruby/enc/test_utf32.rb68
-rw-r--r--test/ruby/test_io_m17n.rb4
-rw-r--r--version.h2
6 files changed, 104 insertions, 9 deletions
diff --git a/enc/utf_32be.c b/enc/utf_32be.c
index 43c07e2e8f..99b1004e0c 100644
--- a/enc/utf_32be.c
+++ b/enc/utf_32be.c
@@ -29,11 +29,23 @@
#include "regenc.h"
+static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
static int
-utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
+utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+ OnigEncoding enc)
{
- return 4;
+ if (e < p) {
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ }
+ else if (e-p < 4) {
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+ }
+ else {
+ OnigCodePoint c = utf32be_mbc_to_code(p, e, enc);
+ if (!UNICODE_VALID_CODEPOINT_P(c))
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+ }
}
static int
diff --git a/enc/utf_32le.c b/enc/utf_32le.c
index 31693eed05..58fb3ce0aa 100644
--- a/enc/utf_32le.c
+++ b/enc/utf_32le.c
@@ -29,11 +29,23 @@
#include "regenc.h"
+static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
static int
-utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
+utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+ OnigEncoding enc)
{
- return 4;
+ if (e < p) {
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ }
+ else if (e-p < 4) {
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+ }
+ else {
+ OnigCodePoint c = utf32le_mbc_to_code(p, e, enc);
+ if (!UNICODE_VALID_CODEPOINT_P(c))
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+ }
}
static int
diff --git a/regenc.h b/regenc.h
index 5e467961a1..11e3d70d08 100644
--- a/regenc.h
+++ b/regenc.h
@@ -169,6 +169,9 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
+#define UNICODE_VALID_CODEPOINT_P(c) ( \
+ ((c) <= 0x10ffff) && \
+ !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]
diff --git a/test/ruby/enc/test_utf32.rb b/test/ruby/enc/test_utf32.rb
index 29a2240598..4ce205df18 100644
--- a/test/ruby/enc/test_utf32.rb
+++ b/test/ruby/enc/test_utf32.rb
@@ -89,5 +89,73 @@ EOT
assert_equal(sl, "a".ord.chr("utf-32le"))
assert_equal(sb, "a".ord.chr("utf-32be"))
end
+
+ def test_utf32be_valid_encoding
+ all_assertions do |a|
+ [
+ "\x00\x00\x00\x00",
+ "\x00\x00\x00a",
+ "\x00\x00\x30\x40",
+ "\x00\x00\xd7\xff",
+ "\x00\x00\xe0\x00",
+ "\x00\x00\xff\xff",
+ "\x00\x10\xff\xff",
+ ].each {|s|
+ s.force_encoding("utf-32be")
+ a.for(s) {
+ assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+ }
+ }
+ [
+ "a",
+ "\x00a",
+ "\x00\x00a",
+ "\x00\x00\xd8\x00",
+ "\x00\x00\xdb\xff",
+ "\x00\x00\xdc\x00",
+ "\x00\x00\xdf\xff",
+ "\x00\x11\x00\x00",
+ ].each {|s|
+ s.force_encoding("utf-32be")
+ a.for(s) {
+ assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+ }
+ }
+ end
+ end
+
+ def test_utf32le_valid_encoding
+ all_assertions do |a|
+ [
+ "\x00\x00\x00\x00",
+ "a\x00\x00\x00",
+ "\x40\x30\x00\x00",
+ "\xff\xd7\x00\x00",
+ "\x00\xe0\x00\x00",
+ "\xff\xff\x00\x00",
+ "\xff\xff\x10\x00",
+ ].each {|s|
+ s.force_encoding("utf-32le")
+ a.for(s) {
+ assert_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+ }
+ }
+ [
+ "a",
+ "a\x00",
+ "a\x00\x00",
+ "\x00\xd8\x00\x00",
+ "\xff\xdb\x00\x00",
+ "\x00\xdc\x00\x00",
+ "\xff\xdf\x00\x00",
+ "\x00\x00\x11\x00",
+ ].each {|s|
+ s.force_encoding("utf-32le")
+ a.for(s) {
+ assert_not_predicate(s, :valid_encoding?, "#{encdump s}.valid_encoding?")
+ }
+ }
+ end
+ end
end
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 0f1b1644f5..19dd21104e 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -2203,7 +2203,7 @@ EOT
w.binmode
w.puts(0x010a.chr(Encoding::UTF_32BE))
w.puts(0x010a.chr(Encoding::UTF_16BE))
- w.puts(0x0a010000.chr(Encoding::UTF_32LE))
+ w.puts(0x0a01.chr(Encoding::UTF_32LE))
w.puts(0x0a01.chr(Encoding::UTF_16LE))
w.close
end,
@@ -2211,7 +2211,7 @@ EOT
r.binmode
assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
assert_equal("\x01\x0a\n", r.read(3), bug)
- assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
+ assert_equal("\x01\x0a\x00\x00\n", r.read(5), bug)
assert_equal("\x01\x0a\n", r.read(3), bug)
assert_equal("", r.read, bug)
r.close
diff --git a/version.h b/version.h
index 8722442071..42e3991cae 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.2.7"
#define RUBY_RELEASE_DATE "2017-03-26"
-#define RUBY_PATCHLEVEL 435
+#define RUBY_PATCHLEVEL 436
#define RUBY_RELEASE_YEAR 2017
#define RUBY_RELEASE_MONTH 3