diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | re.c | 44 | ||||
-rw-r--r-- | string.c | 7 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 3 | ||||
-rw-r--r-- | version.h | 2 |
5 files changed, 48 insertions, 13 deletions
@@ -1,3 +1,8 @@ +Sun Aug 16 03:14:04 2015 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * re.c (rb_memsearch): should match only char boundaries in wide + character encodings. [ruby-core:70220] [Bug #11413] + Sun Aug 16 03:00:44 2015 Eric Wong <e@80x24.org> * symbol.h (struct RSymbol): add hashval field @@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, l return -1; } +static inline long +rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 2}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + +static inline long +rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 4}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) { @@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) else return -1; } - else if (m <= SIZEOF_VALUE) { - return rb_memsearch_ss(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 1)) { + if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } } - else if (enc == rb_utf8_encoding()){ - return rb_memsearch_qs_utf8(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 2)) { + return rb_memsearch_wchar(x0, m, y0, n); } - else { - return rb_memsearch_qs(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 4)) { + return rb_memsearch_qchar(x0, m, y0, n); } + return rb_memsearch_qs(x0, m, y0, n); } #define REG_LITERAL FL_USER5 @@ -6373,15 +6373,10 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) } enc = STR_ENC_GET(str); - if (NIL_P(spat)) { - if (!NIL_P(rb_fs)) { - spat = rb_fs; - goto fs_set; - } + if (NIL_P(spat) && NIL_P(spat = rb_fs)) { split_type = awk; } else { - fs_set: spat = get_pat_quoted(spat, 0); if (BUILTIN_TYPE(spat) == T_STRING) { rb_encoding *enc2 = STR_ENC_GET(spat); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index b024ea7b66..980f9c86b0 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1236,6 +1236,9 @@ class TestM17N < Test::Unit::TestCase each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected| assert_equal(expected, str.split(sep, -1)) end + each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected| + assert_equal(expected, str.split(sep, -1)) + end end def test_nonascii_method_name @@ -1,6 +1,6 @@ #define RUBY_VERSION "2.2.3" #define RUBY_RELEASE_DATE "2015-08-16" -#define RUBY_PATCHLEVEL 168 +#define RUBY_PATCHLEVEL 169 #define RUBY_RELEASE_YEAR 2015 #define RUBY_RELEASE_MONTH 8 |