diff options
author | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-03-03 11:39:19 +0000 |
---|---|---|
committer | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-03-03 11:39:19 +0000 |
commit | f3c363c6f72d189703627538ecc918b0e31debde (patch) | |
tree | 2c7bf3ef15a283b6c8ef00fa91e2262c3f2221be | |
parent | 725a2369fd1ddd3ee79ee3325c07a2fd850cc15a (diff) |
merges r22505 and r22547 from trunk into ruby_1_9_1.
--
* string.c (tr_trans): should not be affected by the encoding of
replacement unless actually modified. [ruby-talk:328967]
--
* string.c (tr_trans): should recalculate coderange.
[ruby-core:22326] (reopened at [ruby-core:22328])
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_1@22731 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | string.c | 40 | ||||
-rw-r--r-- | test/ruby/test_string.rb | 9 |
3 files changed, 51 insertions, 8 deletions
@@ -1,3 +1,13 @@ +Sun Feb 22 22:42:20 2009 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * string.c (tr_trans): should recalculate coderange. + [ruby-core:22326] (reopened at [ruby-core:22328]) + +Sun Feb 22 14:33:06 2009 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * string.c (tr_trans): should not be affected by the encoding of + replacement unless actually modified. [ruby-talk:328967] + Fri Feb 6 12:11:24 2009 NAKAMURA Usaku <usa@ruby-lang.org> * ruby.c (process_options): set initial default_external before -r. @@ -4603,6 +4603,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) int singlebyte = single_byte_optimizable(str); int cr; +#define CHECK_IF_ASCII(c) \ + (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \ + (cr = ENC_CODERANGE_VALID) : 0) + StringValue(src); StringValue(repl); if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil; @@ -4674,6 +4678,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) } } + if (cr == ENC_CODERANGE_VALID) + cr = ENC_CODERANGE_7BIT; str_modify_keep_cr(str); s = RSTRING_PTR(str); send = RSTRING_END(str); if (sflag) { @@ -4682,8 +4688,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) char *buf = ALLOC_N(char, max), *t = buf; while (s < send) { - c0 = c = rb_enc_codepoint(s, send, enc); - tlen = clen = rb_enc_codelen(c, enc); + int may_modify = 0; + c0 = c = rb_enc_codepoint(s, send, e1); + clen = rb_enc_codelen(c, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); s += clen; if (c < 256) { @@ -4702,7 +4710,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) c = errc; } if (c != -1) { - if (save == c) continue; + if (save == c) { + CHECK_IF_ASCII(c); + continue; + } save = c; tlen = rb_enc_codelen(c, enc); modify = 1; @@ -4710,6 +4721,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) else { save = -1; c = c0; + if (enc != e1) may_modify = 1; } while (t - buf + tlen >= max) { offset = t - buf; @@ -4718,6 +4730,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) t = buf + offset; } rb_enc_mbcput(c, t, enc); + if (may_modify && memcmp(s, t, tlen) != 0) { + modify = 1; + } + CHECK_IF_ASCII(c); t += tlen; } *t = '\0'; @@ -4740,6 +4756,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) modify = 1; } } + CHECK_IF_ASCII(c); s++; } } @@ -4749,8 +4766,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) char *buf = ALLOC_N(char, max), *t = buf; while (s < send) { - c0 = c = rb_enc_codepoint(s, send, enc); - tlen = clen = rb_enc_codelen(c, enc); + int may_modify = 0; + c0 = c = rb_enc_codepoint(s, send, e1); + clen = rb_enc_codelen(c, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); if (c < 256) { c = trans[c]; @@ -4772,8 +4791,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) modify = 1; } else { - modify = 1; c = c0; + if (enc != e1) may_modify = 1; } while (t - buf + tlen >= max) { offset = t - buf; @@ -4781,7 +4800,13 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) REALLOC_N(buf, char, max); t = buf + offset; } - if (s != t) rb_enc_mbcput(c, t, enc); + if (s != t) { + rb_enc_mbcput(c, t, enc); + if (may_modify && memcmp(s, t, tlen) != 0) { + modify = 1; + } + } + CHECK_IF_ASCII(c); s += clen; t += tlen; } @@ -4796,7 +4821,6 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) } if (modify) { - cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(repl)); if (cr != ENC_CODERANGE_BROKEN) ENC_CODERANGE_SET(str, cr); rb_enc_associate(str, enc); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index eba0256439..2315a8c560 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1395,6 +1395,9 @@ class TestString < Test::Unit::TestCase assert_equal(S("hippo"), S("hello").tr(S("el"), S("ip"))) assert_equal(S("*e**o"), S("hello").tr(S("^aeiou"), S("*"))) assert_equal(S("hal"), S("ibm").tr(S("b-z"), S("a-z"))) + + a = "abc".force_encoding(Encoding::US_ASCII) + assert_equal(Encoding::US_ASCII, a.tr(S("z"), S("\u0101")).encoding) end def test_tr! @@ -1415,11 +1418,17 @@ class TestString < Test::Unit::TestCase a = S("ibm") assert_nil(a.tr!(S("B-Z"), S("A-Z"))) assert_equal(S("ibm"), a) + + a = "abc".force_encoding(Encoding::US_ASCII) + assert_nil(a.tr!(S("z"), S("\u0101"))) + assert_equal(Encoding::US_ASCII, a.encoding) end def test_tr_s assert_equal(S("hypo"), S("hello").tr_s(S("el"), S("yp"))) assert_equal(S("h*o"), S("hello").tr_s(S("el"), S("*"))) + assert_equal("a".hash, "\u0101\u0101".tr_s("\u0101", "a").hash) + assert_equal(true, "\u3041\u3041".tr("\u3041", "a").ascii_only?) end def test_tr_s! |