Prevent potential buffer overrun in onigmo

A code pattern `p + enclen(enc, p, pend)` may lead to a buffer overrun if incomplete bytes of a UTF-8 character is placed at the end of a string. Because this pattern is used in several places in onigmo, this change fixes the issue in the side of `enclen`: the function should not return a number that is larger than `pend - p`. Co-Authored-By: Nobuyoshi Nakada <nobu@ruby-lang.org>
author: Yusuke Endoh <mame@ruby-lang.org> 2022-10-25 15:45:40 +0900
committer: Yusuke Endoh <mame@ruby-lang.org> 2022-10-25 17:02:43 +0900
commit: 1d2d25dcadda0764f303183ac091d0c87b432566 (patch)
tree: b9e42a7d3aa1b036b646883825d7a6e26fbe8a5c /regenc.c
parent: 114e71d06280f9c57b9859ee4405ae89a989ddb6 (diff)
1 files changed, 15 insertions, 0 deletions
diff --git a/regenc.c b/regenc.c
index 16d62fdf40..fc131d2533 100644
--- a/regenc.c
+++ b/regenc.c
@@ -52,6 +52,21 @@ onigenc_set_default_encoding(OnigEncoding enc)
 }
 
 extern int
+onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
+{
+  int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
+  if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) {
+    ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
+    if (ret > (int)(e - p)) ret = (int)(e - p); // just for case
+    return ret;
+  }
+  else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) {
+    return (int)(e - p);
+  }
+  return p < e ? 1 : 0;
+}
+
+extern int
 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
 {
   int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
author	Yusuke Endoh <mame@ruby-lang.org>	2022-10-25 15:45:40 +0900
committer	Yusuke Endoh <mame@ruby-lang.org>	2022-10-25 17:02:43 +0900
commit	1d2d25dcadda0764f303183ac091d0c87b432566 (patch)
tree	b9e42a7d3aa1b036b646883825d7a6e26fbe8a5c /regenc.c
parent	114e71d06280f9c57b9859ee4405ae89a989ddb6 (diff)