summaryrefslogtreecommitdiff
path: root/enc
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-03-27 17:34:10 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-03-27 17:34:10 +0000
commit909331e26eb38a8da2f40bd88225d423ddf67cac (patch)
tree1e8352b4a258acc044394cae163b2a0f4a6a3528 /enc
parentff6e8710ee889cc1f3803468d32ca6f26e937d0f (diff)
merge revision(s) 57816,57817: [Backport #13292]
fix UTF-32 valid_encoding? * enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely. [ruby-core:79966] [Bug #13292] * enc/utf_32le.c (utf32le_mbc_enc_len): ditto. * regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid Unicode codepoints. fix UTF-32 valid_encoding? * test/ruby/test_io_m17n.rb (TestIO_M17N#test_puts_widechar): do not use invalid codepoint. [ruby-core:79966] [Bug #13292] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_3@58183 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
-rw-r--r--enc/utf_32be.c18
-rw-r--r--enc/utf_32le.c18
2 files changed, 30 insertions, 6 deletions
diff --git a/enc/utf_32be.c b/enc/utf_32be.c
index 43c07e2e8f..99b1004e0c 100644
--- a/enc/utf_32be.c
+++ b/enc/utf_32be.c
@@ -29,11 +29,23 @@
#include "regenc.h"
+static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
static int
-utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
+utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+ OnigEncoding enc)
{
- return 4;
+ if (e < p) {
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ }
+ else if (e-p < 4) {
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+ }
+ else {
+ OnigCodePoint c = utf32be_mbc_to_code(p, e, enc);
+ if (!UNICODE_VALID_CODEPOINT_P(c))
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+ }
}
static int
diff --git a/enc/utf_32le.c b/enc/utf_32le.c
index 31693eed05..58fb3ce0aa 100644
--- a/enc/utf_32le.c
+++ b/enc/utf_32le.c
@@ -29,11 +29,23 @@
#include "regenc.h"
+static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
static int
-utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
+utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+ OnigEncoding enc)
{
- return 4;
+ if (e < p) {
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ }
+ else if (e-p < 4) {
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+ }
+ else {
+ OnigCodePoint c = utf32le_mbc_to_code(p, e, enc);
+ if (!UNICODE_VALID_CODEPOINT_P(c))
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+ }
}
static int