diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-03-12 17:22:20 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-03-12 17:22:20 +0000 |
commit | acfebb41ddea04ded300bc1c1aedbf7ebebcf276 (patch) | |
tree | 6d208ca08010e32768aa876a1be60713410dd682 /enc | |
parent | 1b60a978a2cb67a2ea5423d5bed3a1d6edbb2eaf (diff) |
merge revision(s) 57816,57817: [Backport #13292]
fix UTF-32 valid_encoding?
* enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely.
[ruby-core:79966] [Bug #13292]
* enc/utf_32le.c (utf32le_mbc_enc_len): ditto.
* regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid
Unicode codepoints.
fix UTF-32 valid_encoding?
* test/ruby/test_io_m17n.rb (TestIO_M17N#test_puts_widechar): do
not use invalid codepoint. [ruby-core:79966] [Bug #13292]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_4@57935 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
-rw-r--r-- | enc/utf_32be.c | 18 | ||||
-rw-r--r-- | enc/utf_32le.c | 18 |
2 files changed, 30 insertions, 6 deletions
diff --git a/enc/utf_32be.c b/enc/utf_32be.c index 995c9d8ed5..17841e52a4 100644 --- a/enc/utf_32be.c +++ b/enc/utf_32be.c @@ -30,11 +30,23 @@ #include "regenc.h" #include "iso_8859.h" +static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); static int -utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) +utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e, + OnigEncoding enc) { - return 4; + if (e < p) { + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + } + else if (e-p < 4) { + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p)); + } + else { + OnigCodePoint c = utf32be_mbc_to_code(p, e, enc); + if (!UNICODE_VALID_CODEPOINT_P(c)) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } } static int diff --git a/enc/utf_32le.c b/enc/utf_32le.c index e255f0e246..18b798f102 100644 --- a/enc/utf_32le.c +++ b/enc/utf_32le.c @@ -30,11 +30,23 @@ #include "regenc.h" #include "iso_8859.h" +static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); static int -utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) +utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e, + OnigEncoding enc) { - return 4; + if (e < p) { + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + } + else if (e-p < 4) { + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p)); + } + else { + OnigCodePoint c = utf32le_mbc_to_code(p, e, enc); + if (!UNICODE_VALID_CODEPOINT_P(c)) + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + } } static int |