From 26eb8e4b4e39aee7c1e4e9c061c9c17daaafc78b Mon Sep 17 00:00:00 2001 From: usa Date: Mon, 17 Aug 2015 08:58:50 +0000 Subject: merge revision(s) 51470: [Backport #11413] * re.c (rb_memsearch): should match only char boundaries in wide character encodings. [ruby-core:70220] [Bug #11413] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_1@51615 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- re.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 're.c') diff --git a/re.c b/re.c index d26b783466..ceb72b9377 100644 --- a/re.c +++ b/re.c @@ -223,6 +223,32 @@ rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, l return -1; } +static inline long +rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 2}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + +static inline long +rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 4}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) { @@ -243,15 +269,21 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) else return -1; } - else if (m <= SIZEOF_VALUE) { - return rb_memsearch_ss(x0, m, y0, n); + else if (rb_enc_mbminlen(enc) == 1) { + if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } } - else if (enc == rb_utf8_encoding()){ - return rb_memsearch_qs_utf8(x0, m, y0, n); + else if (rb_enc_mbminlen(enc) == 2) { + return rb_memsearch_wchar(x0, m, y0, n); } - else { - return rb_memsearch_qs(x0, m, y0, n); + else if (rb_enc_mbminlen(enc) == 4) { + return rb_memsearch_qchar(x0, m, y0, n); } + return rb_memsearch_qs(x0, m, y0, n); } #define REG_LITERAL FL_USER5 -- cgit v1.2.3