fix UTF-32 valid_encoding?

* enc/utf_32be.c (utf32be_mbc_enc_len): check arguments precisely. [ruby-core:79966] [Bug #13292] * enc/utf_32le.c (utf32le_mbc_enc_len): ditto. * regenc.h (UNICODE_VALID_CODEPOINT_P): predicate for valid Unicode codepoints. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57816 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2017-03-09 02:04:10 +0000
committer: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2017-03-09 02:04:10 +0000
commit: 4171ed6c21a36d88cfc5fe5152a6b58a75cf6240 (patch)
tree: 7e2e4ce7c0bd435f00c4f26e79f023782fd6f67a /enc
parent: e65c9bd17749dc50f5ebd514dd6fc1d6ee99643b (diff)
2 files changed, 30 insertions, 6 deletions
diff --git a/enc/utf_32be.c b/enc/utf_32be.c
index 995c9d8ed5..17841e52a4 100644
--- a/enc/utf_32be.c
+++ b/enc/utf_32be.c
@@ -30,11 +30,23 @@
 #include "regenc.h"
 #include "iso_8859.h"
 
+static OnigCodePoint utf32be_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
 static int
-utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
-		    OnigEncoding enc ARG_UNUSED)
+utf32be_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+		    OnigEncoding enc)
 {
-  return 4;
+  if (e < p) {
+    return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  }
+  else if (e-p < 4) {
+    return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+  }
+  else {
+    OnigCodePoint c = utf32be_mbc_to_code(p, e, enc);
+    if (!UNICODE_VALID_CODEPOINT_P(c))
+      return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+    return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+  }
 }
 
 static int
diff --git a/enc/utf_32le.c b/enc/utf_32le.c
index e255f0e246..18b798f102 100644
--- a/enc/utf_32le.c
+++ b/enc/utf_32le.c
@@ -30,11 +30,23 @@
 #include "regenc.h"
 #include "iso_8859.h"
 
+static OnigCodePoint utf32le_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
 static int
-utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e ARG_UNUSED,
-		    OnigEncoding enc ARG_UNUSED)
+utf32le_mbc_enc_len(const UChar* p ARG_UNUSED, const OnigUChar* e,
+		    OnigEncoding enc)
 {
-  return 4;
+  if (e < p) {
+    return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+  }
+  else if (e-p < 4) {
+    return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-(int)(e-p));
+  }
+  else {
+    OnigCodePoint c = utf32le_mbc_to_code(p, e, enc);
+    if (!UNICODE_VALID_CODEPOINT_P(c))
+      return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+    return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+  }
 }
 
 static int
author	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2017-03-09 02:04:10 +0000
committer	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2017-03-09 02:04:10 +0000
commit	4171ed6c21a36d88cfc5fe5152a6b58a75cf6240 (patch)
tree	7e2e4ce7c0bd435f00c4f26e79f023782fd6f67a /enc
parent	e65c9bd17749dc50f5ebd514dd6fc1d6ee99643b (diff)