diff options
author | Yusuke Endoh <mame@ruby-lang.org> | 2022-10-25 15:45:40 +0900 |
---|---|---|
committer | Yusuke Endoh <mame@ruby-lang.org> | 2022-10-25 17:02:43 +0900 |
commit | 1d2d25dcadda0764f303183ac091d0c87b432566 (patch) | |
tree | b9e42a7d3aa1b036b646883825d7a6e26fbe8a5c /regenc.c | |
parent | 114e71d06280f9c57b9859ee4405ae89a989ddb6 (diff) |
Prevent potential buffer overrun in onigmo
A code pattern `p + enclen(enc, p, pend)` may lead to a buffer overrun
if incomplete bytes of a UTF-8 character is placed at the end of a
string. Because this pattern is used in several places in onigmo,
this change fixes the issue in the side of `enclen`: the function should
not return a number that is larger than `pend - p`.
Co-Authored-By: Nobuyoshi Nakada <nobu@ruby-lang.org>
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6628
Diffstat (limited to 'regenc.c')
-rw-r--r-- | regenc.c | 15 |
1 files changed, 15 insertions, 0 deletions
@@ -52,6 +52,21 @@ onigenc_set_default_encoding(OnigEncoding enc) } extern int +onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e); + if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) { + ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret); + if (ret > (int)(e - p)) ret = (int)(e - p); // just for case + return ret; + } + else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) { + return (int)(e - p); + } + return p < e ? 1 : 0; +} + +extern int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) { int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e); |