summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--re.c44
-rw-r--r--string.c7
-rw-r--r--test/ruby/test_m17n.rb3
-rw-r--r--version.h2
5 files changed, 48 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 07e34f4cd7..1b9142cfdf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Sun Aug 16 03:14:04 2015 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * re.c (rb_memsearch): should match only char boundaries in wide
+ character encodings. [ruby-core:70220] [Bug #11413]
+
Sun Aug 16 03:00:44 2015 Eric Wong <e@80x24.org>
* symbol.h (struct RSymbol): add hashval field
diff --git a/re.c b/re.c
index 7e789f56df..a94bf35679 100644
--- a/re.c
+++ b/re.c
@@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, l
return -1;
}
+static inline long
+rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, x0 = *xs, *y = ys;
+ enum {char_size = 2};
+
+ for (n -= m; n > 0; n -= char_size, y += char_size) {
+ if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
+static inline long
+rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, x0 = *xs, *y = ys;
+ enum {char_size = 4};
+
+ for (n -= m; n > 0; n -= char_size, y += char_size) {
+ if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
long
rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
{
@@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
else
return -1;
}
- else if (m <= SIZEOF_VALUE) {
- return rb_memsearch_ss(x0, m, y0, n);
+ else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
+ if (m <= SIZEOF_VALUE) {
+ return rb_memsearch_ss(x0, m, y0, n);
+ }
+ else if (enc == rb_utf8_encoding()){
+ return rb_memsearch_qs_utf8(x0, m, y0, n);
+ }
}
- else if (enc == rb_utf8_encoding()){
- return rb_memsearch_qs_utf8(x0, m, y0, n);
+ else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
+ return rb_memsearch_wchar(x0, m, y0, n);
}
- else {
- return rb_memsearch_qs(x0, m, y0, n);
+ else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
+ return rb_memsearch_qchar(x0, m, y0, n);
}
+ return rb_memsearch_qs(x0, m, y0, n);
}
#define REG_LITERAL FL_USER5
diff --git a/string.c b/string.c
index 20d77672ea..53fc9471b0 100644
--- a/string.c
+++ b/string.c
@@ -6373,15 +6373,10 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
enc = STR_ENC_GET(str);
- if (NIL_P(spat)) {
- if (!NIL_P(rb_fs)) {
- spat = rb_fs;
- goto fs_set;
- }
+ if (NIL_P(spat) && NIL_P(spat = rb_fs)) {
split_type = awk;
}
else {
- fs_set:
spat = get_pat_quoted(spat, 0);
if (BUILTIN_TYPE(spat) == T_STRING) {
rb_encoding *enc2 = STR_ENC_GET(spat);
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index b024ea7b66..980f9c86b0 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -1236,6 +1236,9 @@ class TestM17N < Test::Unit::TestCase
each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected|
assert_equal(expected, str.split(sep, -1))
end
+ each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected|
+ assert_equal(expected, str.split(sep, -1))
+ end
end
def test_nonascii_method_name
diff --git a/version.h b/version.h
index 0c426b02c7..ca51a1a3ea 100644
--- a/version.h
+++ b/version.h
@@ -1,6 +1,6 @@
#define RUBY_VERSION "2.2.3"
#define RUBY_RELEASE_DATE "2015-08-16"
-#define RUBY_PATCHLEVEL 168
+#define RUBY_PATCHLEVEL 169
#define RUBY_RELEASE_YEAR 2015
#define RUBY_RELEASE_MONTH 8