summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-17 05:34:46 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-17 05:34:46 +0000
commite1eb54b99d592c460fea4b0f897f5e8e61c96c07 (patch)
treed351193f488c5f3640753b32014c61dc86d444af /string.c
parent62c07674e06443075872bdc11662de6408bd00d2 (diff)
string.c: improve splitting into chars
* string.c (rb_str_split_m): improve splitting into chars by an empty string, without a regexp. Comparison: to_chars-1 built-ruby: 1273527.6 i/s compare-ruby: 189423.3 i/s - 6.72x slower to_chars-10 built-ruby: 120993.5 i/s compare-ruby: 37075.8 i/s - 3.26x slower to_chars-100 built-ruby: 15646.4 i/s compare-ruby: 4012.1 i/s - 3.90x slower to_chars-1000 built-ruby: 1295.1 i/s compare-ruby: 408.5 i/s - 3.17x slower git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67582 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c30
1 files changed, 20 insertions, 10 deletions
diff --git a/string.c b/string.c
index 3bf6f42e2a..153fcc59b7 100644
--- a/string.c
+++ b/string.c
@@ -7759,7 +7759,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
rb_encoding *enc;
VALUE spat;
VALUE limit;
- enum {awk, string, regexp} split_type;
+ enum {awk, string, regexp, chars} split_type;
long beg, end, i = 0, empty_count = -1;
int lim = 0;
VALUE result, tmp;
@@ -7801,8 +7801,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
split_type = string;
if (RSTRING_LEN(spat) == 0) {
/* Special case - split into chars */
- spat = rb_reg_regcomp(spat);
- split_type = regexp;
+ split_type = chars;
}
else if (rb_enc_asciicompat(enc2) == 1) {
if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
@@ -7823,9 +7822,9 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
if (result) result = rb_ary_new();
beg = 0;
+ char *ptr = RSTRING_PTR(str);
+ char *eptr = RSTRING_END(str);
if (split_type == awk) {
- char *ptr = RSTRING_PTR(str);
- char *eptr = RSTRING_END(str);
char *bptr = ptr;
int skip = 1;
unsigned int c;
@@ -7884,10 +7883,8 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
}
else if (split_type == string) {
- char *ptr = RSTRING_PTR(str);
char *str_start = ptr;
char *substr_start = ptr;
- char *eptr = RSTRING_END(str);
char *sptr = RSTRING_PTR(spat);
long slen = RSTRING_LEN(spat);
@@ -7908,8 +7905,21 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
}
beg = ptr - str_start;
}
+ else if (split_type == chars) {
+ char *str_start = ptr;
+ int n;
+
+ mustnot_broken(str);
+ enc = rb_enc_get(str);
+ while (ptr < eptr &&
+ (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
+ SPLIT_STR(ptr - str_start, n);
+ ptr += n;
+ if (!NIL_P(limit) && lim <= ++i) break;
+ }
+ beg = ptr - str_start;
+ }
else {
- char *ptr = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
long start = beg;
long idx;
@@ -7924,14 +7934,14 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
break;
}
else if (last_null == 1) {
- SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, ptr+len, enc));
+ SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
beg = start;
}
else {
if (start == len)
start++;
else
- start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
+ start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
last_null = 1;
continue;
}