summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-05-20 07:59:33 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-05-20 07:59:33 +0000
commit8405033139bc3ee4cbdf1b1c415910f7df38ebfd (patch)
tree6cbb7e8e46c9acf8f7eeed8aaf5f6c342b969a91
parent7992fabaf40f8635653d267592f591d9cc4c511e (diff)
* string.c (rb_enc_strlen_cr): need to set ENC_CODERANGE_7BIT if
search_nonascii() fails. [ruby-dev:34751] * string.c (rb_str_reverse): preserve coderange info if the receiver is 7bit string. * string.c (rb_str_reverse_bang): ditto. * string.c (rb_str_reverse_bang): should have called single_byte_optimizable before rb_str_modify() that clears coderange info. * string.c (tr_trans): handle single bytes more eagerly. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16492 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog16
-rw-r--r--string.c50
-rw-r--r--test/ruby/test_array.rb2
3 files changed, 52 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 20dd2fe1d2..e59dbaab46 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,6 +10,22 @@ Mon May 19 23:32:12 2008 Koichi Sasada <ko1@atdot.net>
* vm.c (invoke_block_from_c): fix call flow.
+Tue May 20 08:38:56 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * string.c (rb_enc_strlen_cr): need to set ENC_CODERANGE_7BIT if
+ search_nonascii() fails. [ruby-dev:34751]
+
+ * string.c (rb_str_reverse): preserve coderange info if the
+ receiver is 7bit string.
+
+ * string.c (rb_str_reverse_bang): ditto.
+
+ * string.c (rb_str_reverse_bang): should have called
+ single_byte_optimizable before rb_str_modify() that clears
+ coderange info.
+
+ * string.c (tr_trans): handle single bytes more eagerly.
+
Mon May 19 23:19:35 2008 Yusuke Endoh <mame@tsg.ne.jp>
* regexec.c (slow_search): check the case when the length is 1.
diff --git a/string.c b/string.c
index a80f60555f..2f20d3dc88 100644
--- a/string.c
+++ b/string.c
@@ -726,6 +726,7 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
if (ISASCII(*p)) {
q = search_nonascii(p, e);
if (!q) {
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
return c + (e - p);
}
c += q - p;
@@ -1237,7 +1238,7 @@ rb_str_substr(VALUE str, long beg, long len)
rb_encoding *enc = STR_ENC_GET(str);
VALUE str2;
char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
- int singlebyte = single_byte_optimizable(str);
+ int singlebyte;
if (len < 0) return Qnil;
if (!RSTRING_LEN(str)) {
@@ -1263,6 +1264,7 @@ rb_str_substr(VALUE str, long beg, long len)
else if (beg > 0 && beg > str_strlen(str, enc)) {
return Qnil;
}
+ singlebyte = single_byte_optimizable(str);
if (len == 0) {
p = 0;
}
@@ -3521,14 +3523,15 @@ static VALUE
rb_str_reverse(VALUE str)
{
rb_encoding *enc;
- VALUE obj;
+ VALUE rev;
char *s, *e, *p;
+ int single = 1;
if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
enc = STR_ENC_GET(str);
- obj = rb_str_new5(str, 0, RSTRING_LEN(str));
+ rev = rb_str_new5(str, 0, RSTRING_LEN(str));
s = RSTRING_PTR(str); e = RSTRING_END(str);
- p = RSTRING_END(obj);
+ p = RSTRING_END(rev);
if (RSTRING_LEN(str) > 1) {
if (single_byte_optimizable(str)) {
@@ -3540,17 +3543,26 @@ rb_str_reverse(VALUE str)
while (s < e) {
int clen = rb_enc_mbclen(s, e, enc);
+ if (clen > 1 || (*s & 0x80)) single = 0;
p -= clen;
memcpy(p, s, clen);
s += clen;
}
}
}
- STR_SET_LEN(obj, RSTRING_LEN(str));
- OBJ_INFECT(obj, str);
- rb_enc_cr_str_copy_for_substr(obj, str);
+ STR_SET_LEN(rev, RSTRING_LEN(str));
+ OBJ_INFECT(rev, str);
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
+ if (single) {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
+ }
+ else {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
+ }
+ }
+ rb_enc_cr_str_copy_for_substr(rev, str);
- return obj;
+ return rev;
}
@@ -3564,19 +3576,25 @@ rb_str_reverse(VALUE str)
static VALUE
rb_str_reverse_bang(VALUE str)
{
- char *s, *e, c;
-
if (RSTRING_LEN(str) > 1) {
- rb_str_modify(str);
- s = RSTRING_PTR(str);
- e = RSTRING_END(str) - 1;
-
if (single_byte_optimizable(str)) {
+ char *s, *e, c;
+ int cr = ENC_CODERANGE(str);
+ int single = 1;
+
+ rb_str_modify(str);
+ s = RSTRING_PTR(str);
+ e = RSTRING_END(str) - 1;
while (s < e) {
c = *s;
+ if (*s & 0x80) single = 0;
*s++ = *e;
*e-- = c;
}
+ if (cr == ENC_CODERANGE_UNKNOWN && single) {
+ cr = ENC_CODERANGE_7BIT;
+ }
+ ENC_CODERANGE_SET(str, cr);
}
else {
rb_str_shared_replace(str, rb_str_reverse(str));
@@ -4226,6 +4244,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
int c, c0, last = 0, modify = 0, i, l;
char *s, *send;
VALUE hash = 0;
+ int singlebyte = single_byte_optimizable(str);
StringValue(src);
StringValue(repl);
@@ -4288,6 +4307,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
if (r == -1) r = trrepl.now;
if (c < 256) {
trans[c] = r;
+ if (r > 255) singlebyte = 0;
}
else {
if (!hash) hash = rb_hash_new();
@@ -4348,7 +4368,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
STR_SET_NOEMBED(str);
RSTRING(str)->as.heap.aux.capa = max;
}
- else if (rb_enc_mbmaxlen(enc) == 1) {
+ else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
while (s < send) {
c = (unsigned char)*s;
if (trans[c] >= 0) {
diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb
index 463701b846..54d54e3c61 100644
--- a/test/ruby/test_array.rb
+++ b/test/ruby/test_array.rb
@@ -74,7 +74,7 @@ class TestArray < Test::Unit::TestCase
end
def test_split_0
- x = "The Boassert of Mormon"
+ x = "The Book of Mormon"
assert_equal(x.reverse, x.split(//).reverse!.join)
assert_equal(x.reverse, x.reverse!)
assert_equal("g:n:i:r:t:s: :e:t:y:b: :1", "1 byte string".split(//).reverse.join(":"))