summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-03-03 11:39:19 +0000
committeryugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2009-03-03 11:39:19 +0000
commitf3c363c6f72d189703627538ecc918b0e31debde (patch)
tree2c7bf3ef15a283b6c8ef00fa91e2262c3f2221be
parent725a2369fd1ddd3ee79ee3325c07a2fd850cc15a (diff)
merges r22505 and r22547 from trunk into ruby_1_9_1.
-- * string.c (tr_trans): should not be affected by the encoding of replacement unless actually modified. [ruby-talk:328967] -- * string.c (tr_trans): should recalculate coderange. [ruby-core:22326] (reopened at [ruby-core:22328]) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_1@22731 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--string.c40
-rw-r--r--test/ruby/test_string.rb9
3 files changed, 51 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index eb480282c2..7fb56be290 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sun Feb 22 22:42:20 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * string.c (tr_trans): should recalculate coderange.
+ [ruby-core:22326] (reopened at [ruby-core:22328])
+
+Sun Feb 22 14:33:06 2009 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * string.c (tr_trans): should not be affected by the encoding of
+ replacement unless actually modified. [ruby-talk:328967]
+
Fri Feb 6 12:11:24 2009 NAKAMURA Usaku <usa@ruby-lang.org>
* ruby.c (process_options): set initial default_external before -r.
diff --git a/string.c b/string.c
index 432fdc4c79..1a8b182ecc 100644
--- a/string.c
+++ b/string.c
@@ -4603,6 +4603,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
int singlebyte = single_byte_optimizable(str);
int cr;
+#define CHECK_IF_ASCII(c) \
+ (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
+ (cr = ENC_CODERANGE_VALID) : 0)
+
StringValue(src);
StringValue(repl);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
@@ -4674,6 +4678,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
}
+ if (cr == ENC_CODERANGE_VALID)
+ cr = ENC_CODERANGE_7BIT;
str_modify_keep_cr(str);
s = RSTRING_PTR(str); send = RSTRING_END(str);
if (sflag) {
@@ -4682,8 +4688,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
char *buf = ALLOC_N(char, max), *t = buf;
while (s < send) {
- c0 = c = rb_enc_codepoint(s, send, enc);
- tlen = clen = rb_enc_codelen(c, enc);
+ int may_modify = 0;
+ c0 = c = rb_enc_codepoint(s, send, e1);
+ clen = rb_enc_codelen(c, e1);
+ tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
s += clen;
if (c < 256) {
@@ -4702,7 +4710,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
c = errc;
}
if (c != -1) {
- if (save == c) continue;
+ if (save == c) {
+ CHECK_IF_ASCII(c);
+ continue;
+ }
save = c;
tlen = rb_enc_codelen(c, enc);
modify = 1;
@@ -4710,6 +4721,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
else {
save = -1;
c = c0;
+ if (enc != e1) may_modify = 1;
}
while (t - buf + tlen >= max) {
offset = t - buf;
@@ -4718,6 +4730,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
t = buf + offset;
}
rb_enc_mbcput(c, t, enc);
+ if (may_modify && memcmp(s, t, tlen) != 0) {
+ modify = 1;
+ }
+ CHECK_IF_ASCII(c);
t += tlen;
}
*t = '\0';
@@ -4740,6 +4756,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
modify = 1;
}
}
+ CHECK_IF_ASCII(c);
s++;
}
}
@@ -4749,8 +4766,10 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
char *buf = ALLOC_N(char, max), *t = buf;
while (s < send) {
- c0 = c = rb_enc_codepoint(s, send, enc);
- tlen = clen = rb_enc_codelen(c, enc);
+ int may_modify = 0;
+ c0 = c = rb_enc_codepoint(s, send, e1);
+ clen = rb_enc_codelen(c, e1);
+ tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
if (c < 256) {
c = trans[c];
@@ -4772,8 +4791,8 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
modify = 1;
}
else {
- modify = 1;
c = c0;
+ if (enc != e1) may_modify = 1;
}
while (t - buf + tlen >= max) {
offset = t - buf;
@@ -4781,7 +4800,13 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
REALLOC_N(buf, char, max);
t = buf + offset;
}
- if (s != t) rb_enc_mbcput(c, t, enc);
+ if (s != t) {
+ rb_enc_mbcput(c, t, enc);
+ if (may_modify && memcmp(s, t, tlen) != 0) {
+ modify = 1;
+ }
+ }
+ CHECK_IF_ASCII(c);
s += clen;
t += tlen;
}
@@ -4796,7 +4821,6 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
}
if (modify) {
- cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(repl));
if (cr != ENC_CODERANGE_BROKEN)
ENC_CODERANGE_SET(str, cr);
rb_enc_associate(str, enc);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index eba0256439..2315a8c560 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -1395,6 +1395,9 @@ class TestString < Test::Unit::TestCase
assert_equal(S("hippo"), S("hello").tr(S("el"), S("ip")))
assert_equal(S("*e**o"), S("hello").tr(S("^aeiou"), S("*")))
assert_equal(S("hal"), S("ibm").tr(S("b-z"), S("a-z")))
+
+ a = "abc".force_encoding(Encoding::US_ASCII)
+ assert_equal(Encoding::US_ASCII, a.tr(S("z"), S("\u0101")).encoding)
end
def test_tr!
@@ -1415,11 +1418,17 @@ class TestString < Test::Unit::TestCase
a = S("ibm")
assert_nil(a.tr!(S("B-Z"), S("A-Z")))
assert_equal(S("ibm"), a)
+
+ a = "abc".force_encoding(Encoding::US_ASCII)
+ assert_nil(a.tr!(S("z"), S("\u0101")))
+ assert_equal(Encoding::US_ASCII, a.encoding)
end
def test_tr_s
assert_equal(S("hypo"), S("hello").tr_s(S("el"), S("yp")))
assert_equal(S("h*o"), S("hello").tr_s(S("el"), S("*")))
+ assert_equal("a".hash, "\u0101\u0101".tr_s("\u0101", "a").hash)
+ assert_equal(true, "\u3041\u3041".tr("\u3041", "a").ascii_only?)
end
def test_tr_s!