summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-15 08:14:40 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-02-15 08:14:40 +0000
commit8b09f7015a6edf1aefecaed421c7fd4b750353bc (patch)
treece1605f45357063be2c8c576ee355bff74997f3c
parent878bbd1199167d6beb0faa85a6d8fe5e8122ee49 (diff)
* string.c (str_strlen): use search_nonascii() for performance.
* string.c (str_nth): ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15486 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--encoding.c4
-rw-r--r--string.c35
3 files changed, 34 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 752b373d6a..8051b8266e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Feb 15 17:12:41 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * string.c (str_strlen): use search_nonascii() for performance.
+
+ * string.c (str_nth): ditto.
+
Fri Feb 15 16:22:49 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* io.c (open_key_args): allow specifying both :mode and :encoding.
diff --git a/encoding.c b/encoding.c
index dff815f266..77d3f996d7 100644
--- a/encoding.c
+++ b/encoding.c
@@ -645,8 +645,8 @@ rb_enc_check(VALUE str1, VALUE str2)
rb_encoding *enc = rb_enc_compatible(str1, str2);
if (!enc)
rb_raise(rb_eArgError, "character encodings differ: %s and %s",
- rb_enc_name(rb_enc_get(str1)),
- rb_enc_name(rb_enc_get(str2)));
+ rb_enc_name(rb_enc_get(str1)),
+ rb_enc_name(rb_enc_get(str2)));
return enc;
}
diff --git a/string.c b/string.c
index 1f6e2f8450..f0ac3844e3 100644
--- a/string.c
+++ b/string.c
@@ -591,11 +591,21 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
static long
str_strlen(VALUE str, rb_encoding *enc)
{
- long len;
+ long len = 0;
+ const char *p, *e;
if (single_byte_optimizable(str)) return RSTRING_LEN(str);
if (!enc) enc = STR_ENC_GET(str);
- len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc);
+ p = RSTRING_PTR(str);
+ e = RSTRING_END(str);
+ if (rb_enc_asciicompat(enc)) {
+ const char *p2 = search_nonascii(p, e);
+
+ if (!p2) return RSTRING_LEN(str);
+ len = p2 - p;
+ p = p2;
+ }
+ len += rb_enc_strlen(p, e, enc);
if (len < 0) {
rb_raise(rb_eArgError, "invalid mbstring sequence");
}
@@ -886,8 +896,17 @@ str_nth(const char *p, const char *e, int nth, rb_encoding *enc, int singlebyte)
{
if (singlebyte)
p += nth;
- else
+ else {
+ if (rb_enc_asciicompat(enc)) {
+ const char *p2 = search_nonascii(p, e);
+
+ if (!p2 || p + nth < p2)
+ return (char*)p + nth;
+ nth -= p2 - p;
+ p = p2;
+ }
p = rb_enc_nth(p, e, nth, enc);
+ }
if (!p) return 0;
if (p > e) p = e;
return (char *)p;
@@ -2805,14 +2824,12 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
enc = rb_enc_compatible(str, repl);
if (!enc) {
rb_encoding *str_enc = STR_ENC_GET(str);
- if (coderange_scan(RSTRING_PTR(str), BEG(0), str_enc) !=
- ENC_CODERANGE_7BIT ||
+ if (coderange_scan(RSTRING_PTR(str), BEG(0), str_enc) != ENC_CODERANGE_7BIT ||
coderange_scan(RSTRING_PTR(str)+END(0),
- RSTRING_LEN(str)-END(0), str_enc) !=
- ENC_CODERANGE_7BIT) {
+ RSTRING_LEN(str)-END(0), str_enc) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eArgError, "character encodings differ: %s and %s",
- rb_enc_name(str_enc),
- rb_enc_name(STR_ENC_GET(repl)));
+ rb_enc_name(str_enc),
+ rb_enc_name(STR_ENC_GET(repl)));
}
enc = STR_ENC_GET(repl);
}