diff options
author | Kasumi Hanazuki <kasumi@rollingapple.net> | 2020-08-13 03:37:32 +0000 |
---|---|---|
committer | Nobuyoshi Nakada <nobu@ruby-lang.org> | 2020-08-13 20:50:50 +0900 |
commit | 5d71eed1a7f0a70db013de59cd7e95bdca0d5c0e (patch) | |
tree | 99bc9c8b9bf059d8db50721841cb43bba8ca12d1 /string.c | |
parent | 69b5241c360aa180550538dba1c7bf0a6cc8cc22 (diff) |
rb_str_{partition,rpartition}_m: Handle /\K/ in pattern
When the pattern given to String#partition and String#rpartition
contain a /\K/ (lookbehind) operator, the methods return strings
sliced at incorrect positions.
```
# without patch
"abcdbce".partition(/b\Kc/) # => ["a", "c", "cdbce"]
"abcdbce".rpartition(/b\Kc/) # => ["abcd", "c", "ce"]
```
This patch fixes the problem by using BEG(0) instead of the return
value of rb_reg_search.
```
# with patch
"abcdbce".partition(/b\Kc/) # => ["ab", "c", "dbce"]
"abcdbce".rpartition(/b\Kc/) # => ["abcdb", "c", "e"]
```
As a side-effect this patch makes String#partition 2x faster when the
pattern is a costly Regexp by performing Regexp search only once,
which was unexpectedly done twice in the original implementation.
Fixes [Bug #17119]
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/3413
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 45 |
1 files changed, 22 insertions, 23 deletions
@@ -9940,11 +9940,14 @@ rb_str_partition(VALUE str, VALUE sep) sep = get_pat_quoted(sep, 0); if (RB_TYPE_P(sep, T_REGEXP)) { - pos = rb_reg_search(sep, str, 0, 0); - if (pos < 0) { + if (rb_reg_search(sep, str, 0, 0) < 0) { goto failed; } - sep = rb_str_subpat(str, sep, INT2FIX(0)); + VALUE match = rb_backref_get(); + struct re_registers *regs = RMATCH_REGS(match); + + pos = BEG(0); + sep = rb_str_subseq(str, pos, END(0) - pos); } else { pos = rb_str_index(str, sep, 0); @@ -9978,37 +9981,33 @@ static VALUE rb_str_rpartition(VALUE str, VALUE sep) { long pos = RSTRING_LEN(str); - int regex = FALSE; + sep = get_pat_quoted(sep, 0); if (RB_TYPE_P(sep, T_REGEXP)) { - pos = rb_reg_search(sep, str, pos, 1); - regex = TRUE; + if (rb_reg_search(sep, str, pos, 1) < 0) { + goto failed; + } + VALUE match = rb_backref_get(); + struct re_registers *regs = RMATCH_REGS(match); + + pos = BEG(0); + sep = rb_str_subseq(str, pos, END(0) - pos); } else { - VALUE tmp; - - tmp = rb_check_string_type(sep); - if (NIL_P(tmp)) { - rb_raise(rb_eTypeError, "type mismatch: %s given", - rb_obj_classname(sep)); - } - sep = tmp; pos = rb_str_sublen(str, pos); pos = rb_str_rindex(str, sep, pos); + if(pos < 0) { + goto failed; + } + pos = rb_str_offset(str, pos); } - if (pos < 0) { - return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str)); - } - if (regex) { - sep = rb_reg_nth_match(0, rb_backref_get()); - } - else { - pos = rb_str_offset(str, pos); - } + return rb_ary_new3(3, rb_str_subseq(str, 0, pos), sep, rb_str_subseq(str, pos+RSTRING_LEN(sep), RSTRING_LEN(str)-pos-RSTRING_LEN(sep))); + failed: + return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str)); } /* |