summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-04-20 15:04:18 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-04-20 15:04:18 +0000
commit254d12215c3223b5820c1356dc12cd38eaa4cc37 (patch)
treea4688a020834a1c8a315c90891505c57565dd125 /string.c
parentc7853b4344d821667a158bea0288e1861f70047f (diff)
* string.c (rb_str_split_m): faster processing on 7bit strings.
* string.c (ascii_isspace): faster isspace() for 7bit strings. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23234 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c62
1 files changed, 53 insertions, 9 deletions
diff --git a/string.c b/string.c
index 7d96cdbc16..599b995795 100644
--- a/string.c
+++ b/string.c
@@ -5382,6 +5382,26 @@ rb_str_count(int argc, VALUE *argv, VALUE str)
return INT2NUM(i);
}
+static const char isspacetable[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
/*
* call-seq:
@@ -5495,21 +5515,45 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
unsigned int c;
end = beg;
- while (ptr < eptr) {
- c = rb_enc_codepoint(ptr, eptr, enc);
- ptr += rb_enc_mbclen(ptr, eptr, enc);
- if (skip) {
- if (rb_enc_isspace(c, enc)) {
+ if (is_ascii_string(str)) {
+ while (ptr < eptr) {
+ c = (unsigned char)*ptr++;
+ if (skip) {
+ if (ascii_isspace(c)) {
+ beg = ptr - bptr;
+ }
+ else {
+ end = ptr - bptr;
+ skip = 0;
+ if (!NIL_P(limit) && lim <= i) break;
+ }
+ }
+ else if (ascii_isspace(c)) {
+ rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
+ skip = 1;
beg = ptr - bptr;
+ if (!NIL_P(limit)) ++i;
}
else {
end = ptr - bptr;
- skip = 0;
- if (!NIL_P(limit) && lim <= i) break;
}
}
- else {
- if (rb_enc_isspace(c, enc)) {
+ }
+ else {
+ while (ptr < eptr) {
+ c = rb_enc_codepoint(ptr, eptr, enc);
+ ptr += rb_enc_mbclen(ptr, eptr, enc);
+ if (skip) {
+ if (rb_enc_isspace(c, enc)) {
+ beg = ptr - bptr;
+ }
+ else {
+ end = ptr - bptr;
+ skip = 0;
+ if (!NIL_P(limit) && lim <= i) break;
+ }
+ }
+ else if (rb_enc_isspace(c, enc)) {
rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
skip = 1;
beg = ptr - bptr;