summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog14
-rw-r--r--bootstraptest/test_m17n.rb200
-rw-r--r--configure.in2
-rw-r--r--encoding.c22
-rw-r--r--string.c11
-rw-r--r--test/ruby/test_m17n.rb109
6 files changed, 137 insertions, 221 deletions
diff --git a/ChangeLog b/ChangeLog
index 04b1256042..719fa74b1e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+Tue Jan 22 12:57:07 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from
+ MINIRUBY since miniruby might not be able to load DLL.
+
+ * test/ruby/test_m17n.rb: move tests from bootstrap test.
+
+ * encoding.c (enc_find): should check name if ASCII compatible.
+
+ * string.c (rb_str_end_with): should check character boundary.
+
+ * encoding.c (rb_enc_compatible): encoding must be ASCII
+ compatible before checking ENC_CODERANGE_7BIT.
+
Tue Jan 22 09:26:47 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (rb_str_each_char): iterates over a shadow.
diff --git a/bootstraptest/test_m17n.rb b/bootstraptest/test_m17n.rb
deleted file mode 100644
index d69a110f3d..0000000000
--- a/bootstraptest/test_m17n.rb
+++ /dev/null
@@ -1,200 +0,0 @@
-assert_normal_exit %q{
- "abcdefghij\xf0".force_encoding("utf-8").reverse.inspect
-}, '[ruby-dev:32448]'
-
-assert_equal 'true', %q{
- "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp")) ==
- "a\xa1\xa1c".force_encoding("euc-jp")
-}
-
-assert_equal 'ok', %q{
- begin
- if ("\xa1\xa2\xa1\xa3").force_encoding("euc-jp").split(//) ==
- ["\xa1\xa2".force_encoding("euc-jp"), "\xa1\xa3".force_encoding("euc-jp")]
- :ok
- else
- :ng
- end
- rescue
- :ng
- end
-}, '[ruby-dev:32452]'
-
-assert_equal 'ok', %q{
- begin
- "\xa1\xa1".force_encoding("euc-jp") + "\xa1".force_encoding("ascii-8bit")
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'ok', %q{
- begin
- "%s%s" % ["\xc2\xa1".force_encoding("sjis"), "\xc2\xa1".force_encoding("euc-jp")]
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal '0', %q{
- "\xa1\xa2".force_encoding("euc-jp").count("z")
-}
-
-assert_equal '1', %q{
- "\xa1\xa2".force_encoding("euc-jp").delete("z").length
-}
-
-assert_equal 'false', %q{
- "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").include?("\xa3".force_encoding("euc-jp"))
-}
-
-assert_equal 'ok', %q{
- "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").index("\xa3".force_encoding("euc-jp")) or :ok
-}
-
-assert_equal 'ok', %q{
- "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").rindex("\xa3".force_encoding("euc-jp")) or :ok
-}
-
-assert_equal 'false', %q{
- s1 = "\xa1\xa1".force_encoding("euc-jp")
- s2 = s1.dup
- (94*94+94).times { s2.next! }
- s1 == s2
-}
-
-assert_equal 'ok', %q{
- "\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").scan(/a/)
- :ok
-}
-
-assert_equal 'ok', %q{
- "\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").split(/a/)
- :ok
-}
-
-assert_equal 'ok', %q{
- s1 = "\xa1\xa2".force_encoding("euc-jp")
- s2 = "\xa1\xa2".force_encoding("sjis")
- begin
- s1.upto(s2) {|x| break }
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'true', %q{
- "%s" % "\xa1\xa1".force_encoding("euc-jp") ==
- "\xa1\xa1".force_encoding("euc-jp")
-}
-
-assert_equal 'a', %q{
- s = "a".dup.force_encoding("EUC-JP")
- s.length
- s[0,2]
-}
-
-assert_equal 'ok', %q{
- s1 = "\x81\x41".force_encoding("sjis")
- s2 = "\x81\x61".force_encoding("sjis")
- s1.casecmp(s2) == 0 ? :ng : :ok
-}
-
-assert_equal 'EUC-JP', %q{ ("\xc2\xa1 %s".force_encoding("EUC-JP") % "foo").encoding.name }
-assert_equal 'true', %q{ "\xa1\xa2\xa3\xa4".force_encoding("euc-jp")["\xa2\xa3".force_encoding("euc-jp")] == nil }
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- begin
- s["\xb0\xa3"] = "foo"
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'EUC-JP', %q{ "\xa3\xb0".force_encoding("EUC-JP").center(10).encoding.name }
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- begin
- s.chomp("\xa3\xb4".force_encoding("shift_jis"))
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- begin
- s.count("\xa3\xb0".force_encoding("ascii-8bit"))
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- begin
- s.delete("\xa3\xb2".force_encoding("ascii-8bit"))
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- begin
- s.each_line("\xa3\xb1".force_encoding("ascii-8bit")) {|l| }
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'true', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- s.gsub(/\xa3\xb1/e, "z") == "\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
-}
-
-assert_equal 'false', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- s.include?("\xb0\xa3".force_encoding("euc-jp"))
-}
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- s.index("\xb3\xa3".force_encoding("euc-jp")) or :ok
-}
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- s.insert(-1, "a")
- :ok
-}
-
-assert_finish 1, %q{ "\xa3\xfe".force_encoding("euc-jp").next }
-
-assert_equal 'ok', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- begin
- s.rindex("\xb1\xa3".force_encoding("ascii-8bit"))
- :ng
- rescue ArgumentError
- :ok
- end
-}
-
-assert_equal 'true', %q{
- s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- s.split("\xa3\xb1".force_encoding("euc-jp")) == [
- "\xa3\xb0".force_encoding("euc-jp"),
- "\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- ]
-}, '[ruby-dev:32452]'
-
-assert_normal_exit %q{ // =~ :a }
diff --git a/configure.in b/configure.in
index e2e549c8bb..c064b378c0 100644
--- a/configure.in
+++ b/configure.in
@@ -1370,7 +1370,7 @@ if test x"$cross_compiling" = xyes; then
PREP=fake.rb
RUNRUBY='$(MINIRUBY) -I`cd $(srcdir)/lib; pwd`'
else
- MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib -I$(EXTOUT)/$(arch)'
+ MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib'
PREP='miniruby$(EXEEXT)'
RUNRUBY='$(MINIRUBY) $(srcdir)/runruby.rb --extout=$(EXTOUT)'
fi
diff --git a/encoding.c b/encoding.c
index 328f2d4742..f429f2c816 100644
--- a/encoding.c
+++ b/encoding.c
@@ -640,7 +640,7 @@ rb_encoding*
rb_enc_compatible(VALUE str1, VALUE str2)
{
int idx1, idx2;
- rb_encoding *enc;
+ rb_encoding *enc1, *enc2;
idx1 = rb_enc_get_index(str1);
idx2 = rb_enc_get_index(str2);
@@ -648,6 +648,10 @@ rb_enc_compatible(VALUE str1, VALUE str2)
if (idx1 == idx2) {
return rb_enc_from_index(idx1);
}
+ enc1 = rb_enc_from_index(idx1);
+ enc2 = rb_enc_from_index(idx2);
+ if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2))
+ return 0;
if (BUILTIN_TYPE(str1) != T_STRING) {
VALUE tmp = str1;
int idx0 = idx1;
@@ -664,17 +668,15 @@ rb_enc_compatible(VALUE str1, VALUE str2)
cr2 = rb_enc_str_coderange(str2);
if (cr1 != cr2) {
/* may need to handle ENC_CODERANGE_BROKEN */
- if (cr1 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx2);
- if (cr2 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx1);
+ if (cr1 == ENC_CODERANGE_7BIT) return enc2;
}
if (cr2 == ENC_CODERANGE_7BIT) {
- if (idx1 == 0) return rb_enc_from_index(idx2);
- return rb_enc_from_index(idx1);
+ if (idx1 == 0) return enc2;
+ return enc1;
}
}
- if (cr1 == ENC_CODERANGE_7BIT &&
- rb_enc_asciicompat(enc = rb_enc_from_index(idx2)))
- return enc;
+ if (cr1 == ENC_CODERANGE_7BIT)
+ return enc2;
}
return 0;
}
@@ -908,7 +910,11 @@ static VALUE
enc_find(VALUE klass, VALUE enc)
{
int idx;
+
if (SYMBOL_P(enc)) enc = rb_id2str(SYM2ID(enc));
+ if (!rb_enc_asciicompat(rb_enc_get(enc))) {
+ rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
+ }
idx = rb_enc_find_index(StringValueCStr(enc));
if (idx < 0) {
rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
diff --git a/string.c b/string.c
index c963b7f434..7d716f0523 100644
--- a/string.c
+++ b/string.c
@@ -5522,14 +5522,19 @@ static VALUE
rb_str_end_with(int argc, VALUE *argv, VALUE str)
{
int i;
+ char *p, *s;
+ rb_encoding *enc;
for (i=0; i<argc; i++) {
VALUE tmp = rb_check_string_type(argv[i]);
if (NIL_P(tmp)) continue;
- rb_enc_check(str, tmp);
+ enc = rb_enc_check(str, tmp);
if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
- if (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - RSTRING_LEN(tmp),
- RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
+ p = RSTRING_PTR(str);
+ s = p + RSTRING_LEN(str) - RSTRING_LEN(tmp);
+ if (rb_enc_left_char_head(p, s, enc) != s)
+ continue;
+ if (memcmp(s, p, RSTRING_LEN(tmp)) == 0)
return Qtrue;
}
return Qfalse;
diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb
index 5a0115e285..a7ec25d57d 100644
--- a/test/ruby/test_m17n.rb
+++ b/test/ruby/test_m17n.rb
@@ -690,6 +690,9 @@ class TestM17N < Test::Unit::TestCase
#assert_raise(ArgumentError) { s("%c") % 0xc2a1 }
assert_strenc("\u{c2a1}", 'UTF-8', u("%c") % 0xc2a1)
assert_strenc("\u{c2}", 'UTF-8', u("%c") % 0xc2)
+ assert_raise(ArgumentError) {
+ "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")]
+ }
end
def test_sprintf_p
@@ -744,6 +747,7 @@ class TestM17N < Test::Unit::TestCase
assert_strenc("\x00", 'EUC-JP', e("%s") % e("\x00"))
assert_strenc("\x00", 'Windows-31J', s("%s") % s("\x00"))
assert_strenc("\x00", 'UTF-8', u("%s") % u("\x00"))
+ assert_equal("EUC-JP", (e("\xc2\xa1 %s") % "foo").encoding.name)
end
def test_str_lt
@@ -791,29 +795,34 @@ class TestM17N < Test::Unit::TestCase
assert_equal(nil, u("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")])
assert_raise(ArgumentError) { u("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")] }
+ assert_nil(e("\xa1\xa2\xa3\xa4")[e("\xa2\xa3")])
+ end
+
+ def test_aset
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_raise(ArgumentError){s["\xb0\xa3"] = "foo"}
end
def test_str_center
- assert_encoding("EUC-JP", "a".center(5, "\xa1\xa2".force_encoding("euc-jp")).encoding)
+ assert_encoding("EUC-JP", "a".center(5, e("\xa1\xa2")).encoding)
+ assert_encoding("EUC-JP", e("\xa3\xb0").center(10).encoding)
end
def test_squeeze
- s = "\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp")
- assert_equal("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp"), s.squeeze)
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4")
+ assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4"), s.squeeze)
end
def test_tr
- s = "\x81\x41".force_encoding("shift_jis")
+ s = s("\x81\x41")
assert_equal(s.tr("A", "B"), s)
assert_equal(s.tr_s("A", "B"), s)
assert_nothing_raised {
- "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "a".force_encoding("EUC-JP"))
+ "a".force_encoding("ASCII-8BIT").tr(a("a"), a("a"))
}
- assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
- "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
-
+ assert_equal(e("\xA1\xA1"), a("a").tr(a("a"), e("\xA1\xA1")))
end
def test_tr_s
@@ -821,6 +830,40 @@ class TestM17N < Test::Unit::TestCase
"a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
end
+ def test_count
+ assert_equal(0, e("\xa1\xa2").count("z"))
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_raise(ArgumentError){s.count(a("\xa3\xb0"))}
+ end
+
+ def test_delete
+ assert_equal(1, e("\xa1\xa2").delete("z").length)
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_raise(ArgumentError){s.delete(a("\xa3\xb2"))}
+ end
+
+ def test_include?
+ assert_equal(false, e("\xa1\xa2\xa3\xa4").include?(e("\xa3")))
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_equal(false, s.include?(e("\xb0\xa3")))
+ end
+
+ def test_index
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_nil(s.index(e("\xb3\xa3")))
+ assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3")))
+ assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_raise(ArgumentError){s.rindex(a("\xb1\xa3"))}
+ end
+
+ def test_next
+ s1 = e("\xa1\xa1")
+ s2 = s1.dup
+ (94*94+94).times { s2.next! }
+ assert_not_equal(s1, s2)
+ end
+
def test_sub
s = "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
assert_encoding("EUC-JP", s.encoding)
@@ -828,6 +871,40 @@ class TestM17N < Test::Unit::TestCase
assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").gsub(/./, '\&').encoding)
end
+ def test_insert
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4a"), s.insert(-1, "a"))
+ end
+
+ def test_scan
+ assert_equal(["a"], e("\xa1\xa2a\xa3\xa4").scan(/a/))
+ end
+
+ def test_upto
+ s1 = e("\xa1\xa2")
+ s2 = s("\xa1\xa2")
+ assert_raise(ArgumentError){s1.upto(s2) {|x| break }}
+ end
+
+ def test_casecmp
+ s1 = s("\x81\x41")
+ s2 = s("\x81\x61")
+ assert_not_equal(0, s1.casecmp(s2))
+ end
+
+ def test_reverse
+ assert_equal(u("\xf0jihgfedcba"), u("abcdefghij\xf0").reverse)
+ end
+
+ def test_plus
+ assert_raise(ArgumentError){u("\xe3\x81\x82") + a("\xa1")}
+ end
+
+ def test_chomp
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_raise(ArgumentError){s.chomp(s("\xa3\xb4"))}
+ end
+
def test_gsub
s = 'abc'
s.ascii_only?
@@ -844,14 +921,21 @@ class TestM17N < Test::Unit::TestCase
"\xc2\xa1".force_encoding("utf-8")
}
}
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_equal(e("\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4"), s.gsub(/\xa3\xb1/e, "z"))
end
def test_end_with
- s1 = "\x81\x40".force_encoding("sjis")
+ s1 = s("\x81\x40")
s2 = "@"
assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
end
+ def test_each_line
+ s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
+ assert_raise(ArgumentError){s.each_line(a("\xa3\xb1")) {|l| }}
+ end
+
def test_each_char
a = [e("\xa4\xa2"), "b", e("\xa4\xa4"), "c"]
s = "\xa4\xa2b\xa4\xa4c".force_encoding("euc-jp")
@@ -860,6 +944,13 @@ class TestM17N < Test::Unit::TestCase
def test_regexp_match
assert_equal([0,0], //.match("\xa1\xa1".force_encoding("euc-jp"),-1).offset(0))
+ assert_equal(0, // =~ :a)
+ end
+
+ def test_split
+ assert_equal(e("\xa1\xa2\xa1\xa3").split(//),
+ [e("\xa1\xa2"), e("\xa1\xa3")],
+ '[ruby-dev:32452]')
end
def test_nonascii_method_name