summaryrefslogtreecommitdiff
path: root/re.c
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-08-15 18:30:35 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2015-08-15 18:30:35 +0000
commit65f49d2dcfad3eb436889f724808f30baa943098 (patch)
treed28554452a41f6a172681ac6a5e73c6517eb5cb5 /re.c
parent343b2aa615b109ea6891326f45a465e90233bbeb (diff)
merge revision(s) 51470: [Backport #11413]
* re.c (rb_memsearch): should match only char boundaries in wide character encodings. [ruby-core:70220] [Bug #11413] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_2@51590 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 're.c')
-rw-r--r--re.c44
1 files changed, 38 insertions, 6 deletions
diff --git a/re.c b/re.c
index 7e789f56df..a94bf35679 100644
--- a/re.c
+++ b/re.c
@@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, l
return -1;
}
+static inline long
+rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, x0 = *xs, *y = ys;
+ enum {char_size = 2};
+
+ for (n -= m; n > 0; n -= char_size, y += char_size) {
+ if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
+static inline long
+rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, x0 = *xs, *y = ys;
+ enum {char_size = 4};
+
+ for (n -= m; n > 0; n -= char_size, y += char_size) {
+ if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
long
rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
{
@@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
else
return -1;
}
- else if (m <= SIZEOF_VALUE) {
- return rb_memsearch_ss(x0, m, y0, n);
+ else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
+ if (m <= SIZEOF_VALUE) {
+ return rb_memsearch_ss(x0, m, y0, n);
+ }
+ else if (enc == rb_utf8_encoding()){
+ return rb_memsearch_qs_utf8(x0, m, y0, n);
+ }
}
- else if (enc == rb_utf8_encoding()){
- return rb_memsearch_qs_utf8(x0, m, y0, n);
+ else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
+ return rb_memsearch_wchar(x0, m, y0, n);
}
- else {
- return rb_memsearch_qs(x0, m, y0, n);
+ else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
+ return rb_memsearch_qchar(x0, m, y0, n);
}
+ return rb_memsearch_qs(x0, m, y0, n);
}
#define REG_LITERAL FL_USER5