From d89b15cdce8a2fa36fc2a150551f0dd8e58814d7 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Thu, 31 Aug 2023 15:12:47 -0700 Subject: Use end of char boundary in start_with? Previously we used the next character following the found prefix to determine if the match ended on a broken character. This had caused surprising behaviour when a valid character was followed by a UTF-8 continuation byte. This commit changes the behaviour to instead look for the end of the last character in the prefix. [Bug #19784] Co-authored-by: ywenc Co-authored-by: Nobuyoshi Nakada --- string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 5af5fc4a40..deeed4a12a 100644 --- a/string.c +++ b/string.c @@ -10472,7 +10472,7 @@ rb_str_start_with(int argc, VALUE *argv, VALUE str) p = RSTRING_PTR(str); e = p + slen; s = p + tlen; - if (!at_char_boundary(p, s, e, enc)) + if (!at_char_right_boundary(p, s, e, enc)) continue; if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0) return Qtrue; @@ -10554,7 +10554,7 @@ deleted_prefix_length(VALUE str, VALUE prefix) } const char *strend = strptr + olen; const char *after_prefix = strptr + prefixlen; - if (!at_char_boundary(strptr, after_prefix, strend, enc)) { + if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) { /* prefix does not end at char-boundary */ return 0; } -- cgit v1.2.3