summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-08-04 04:39:53 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-08-04 04:39:53 +0000
commit2b770b467455d7d7d6fa0c03ef177820a56b11f5 (patch)
treeb3ef74f67572122fc7ceafa8436bb69c30b9b9a3 /string.c
parent8ba320a410ec08581dbc1f618ac21ebfd1714d7c (diff)
string.c: improve String#scan
* string.c (rb_str_rstrip_bang): improve the performance in 50% for a string pattern, and in 10% for a regexp pattern. get rid of making MatchData in middle, which is not used. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59496 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c47
1 files changed, 31 insertions, 16 deletions
diff --git a/string.c b/string.c
index 63bf6ba9e2..daef497b3d 100644
--- a/string.c
+++ b/string.c
@@ -8564,35 +8564,49 @@ rb_str_strip(VALUE str)
}
static VALUE
-scan_once(VALUE str, VALUE pat, long *start)
+scan_once(VALUE str, VALUE pat, long *start, int set_backref_str)
{
VALUE result, match;
struct re_registers *regs;
int i;
-
- if (rb_pat_search(pat, str, *start, 1) >= 0) {
- match = rb_backref_get();
- regs = RMATCH_REGS(match);
- if (BEG(0) == END(0)) {
+ long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
+ if (pos >= 0) {
+ if (BUILTIN_TYPE(pat) == T_STRING) {
+ regs = NULL;
+ end = pos + RSTRING_LEN(pat);
+ }
+ else {
+ match = rb_backref_get();
+ regs = RMATCH_REGS(match);
+ end = END(0);
+ }
+ if (pos == end) {
rb_encoding *enc = STR_ENC_GET(str);
/*
* Always consume at least one character of the input string
*/
- if (RSTRING_LEN(str) > END(0))
- *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
- RSTRING_END(str), enc);
+ if (RSTRING_LEN(str) > end)
+ *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
+ RSTRING_END(str), enc);
else
- *start = END(0)+1;
+ *start = end + 1;
}
else {
- *start = END(0);
+ *start = end;
}
- if (regs->num_regs == 1) {
- return rb_reg_nth_match(0, match);
+ if (!regs || regs->num_regs == 1) {
+ result = rb_str_subseq(str, pos, end - pos);
+ OBJ_INFECT(result, pat);
+ return result;
}
result = rb_ary_new2(regs->num_regs);
for (i=1; i < regs->num_regs; i++) {
- rb_ary_push(result, rb_reg_nth_match(i, match));
+ VALUE s = Qnil;
+ if (BEG(i) >= 0) {
+ s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
+ OBJ_INFECT(s, pat);
+ }
+ rb_ary_push(result, s);
}
return result;
@@ -8645,16 +8659,17 @@ rb_str_scan(VALUE str, VALUE pat)
if (!rb_block_given_p()) {
VALUE ary = rb_ary_new();
- while (!NIL_P(result = scan_once(str, pat, &start))) {
+ while (!NIL_P(result = scan_once(str, pat, &start, 0))) {
last = prev;
prev = start;
rb_ary_push(ary, result);
}
if (last >= 0) rb_pat_search(pat, str, last, 1);
+ else rb_backref_set(Qnil);
return ary;
}
- while (!NIL_P(result = scan_once(str, pat, &start))) {
+ while (!NIL_P(result = scan_once(str, pat, &start, 1))) {
last = prev;
prev = start;
rb_yield(result);