summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorKasumi Hanazuki <kasumi@rollingapple.net>2020-08-13 03:37:32 +0000
committerNobuyoshi Nakada <nobu@ruby-lang.org>2020-08-13 20:50:50 +0900
commit5d71eed1a7f0a70db013de59cd7e95bdca0d5c0e (patch)
tree99bc9c8b9bf059d8db50721841cb43bba8ca12d1 /string.c
parent69b5241c360aa180550538dba1c7bf0a6cc8cc22 (diff)
rb_str_{partition,rpartition}_m: Handle /\K/ in pattern
When the pattern given to String#partition and String#rpartition contain a /\K/ (lookbehind) operator, the methods return strings sliced at incorrect positions. ``` # without patch "abcdbce".partition(/b\Kc/) # => ["a", "c", "cdbce"] "abcdbce".rpartition(/b\Kc/) # => ["abcd", "c", "ce"] ``` This patch fixes the problem by using BEG(0) instead of the return value of rb_reg_search. ``` # with patch "abcdbce".partition(/b\Kc/) # => ["ab", "c", "dbce"] "abcdbce".rpartition(/b\Kc/) # => ["abcdb", "c", "e"] ``` As a side-effect this patch makes String#partition 2x faster when the pattern is a costly Regexp by performing Regexp search only once, which was unexpectedly done twice in the original implementation. Fixes [Bug #17119]
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/3413
Diffstat (limited to 'string.c')
-rw-r--r--string.c45
1 files changed, 22 insertions, 23 deletions
diff --git a/string.c b/string.c
index c2cdd0051a..e1c6e54456 100644
--- a/string.c
+++ b/string.c
@@ -9940,11 +9940,14 @@ rb_str_partition(VALUE str, VALUE sep)
sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
- pos = rb_reg_search(sep, str, 0, 0);
- if (pos < 0) {
+ if (rb_reg_search(sep, str, 0, 0) < 0) {
goto failed;
}
- sep = rb_str_subpat(str, sep, INT2FIX(0));
+ VALUE match = rb_backref_get();
+ struct re_registers *regs = RMATCH_REGS(match);
+
+ pos = BEG(0);
+ sep = rb_str_subseq(str, pos, END(0) - pos);
}
else {
pos = rb_str_index(str, sep, 0);
@@ -9978,37 +9981,33 @@ static VALUE
rb_str_rpartition(VALUE str, VALUE sep)
{
long pos = RSTRING_LEN(str);
- int regex = FALSE;
+ sep = get_pat_quoted(sep, 0);
if (RB_TYPE_P(sep, T_REGEXP)) {
- pos = rb_reg_search(sep, str, pos, 1);
- regex = TRUE;
+ if (rb_reg_search(sep, str, pos, 1) < 0) {
+ goto failed;
+ }
+ VALUE match = rb_backref_get();
+ struct re_registers *regs = RMATCH_REGS(match);
+
+ pos = BEG(0);
+ sep = rb_str_subseq(str, pos, END(0) - pos);
}
else {
- VALUE tmp;
-
- tmp = rb_check_string_type(sep);
- if (NIL_P(tmp)) {
- rb_raise(rb_eTypeError, "type mismatch: %s given",
- rb_obj_classname(sep));
- }
- sep = tmp;
pos = rb_str_sublen(str, pos);
pos = rb_str_rindex(str, sep, pos);
+ if(pos < 0) {
+ goto failed;
+ }
+ pos = rb_str_offset(str, pos);
}
- if (pos < 0) {
- return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str));
- }
- if (regex) {
- sep = rb_reg_nth_match(0, rb_backref_get());
- }
- else {
- pos = rb_str_offset(str, pos);
- }
+
return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
sep,
rb_str_subseq(str, pos+RSTRING_LEN(sep),
RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
+ failed:
+ return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), rb_str_dup(str));
}
/*