From 7ff702406a324ed12c69bc23a7cfaf066e401547 Mon Sep 17 00:00:00 2001 From: akr Date: Sat, 1 Dec 2007 16:56:19 +0000 Subject: * include/ruby/intern.h (rb_uv_to_utf8): declared. * re.c (rb_reg_preprocess): new function for dynamic regexp with \u{} such as Regexp.new("\\u{6666}"). (rb_reg_prepare_re): preprocess regexp for recompiling. (read_escaped_byte): new function. (unescape_escaped_nonascii): new function. (append_utf8): new function. (unescape_unicode_list): new function. (unescape_unicode_bmp): new function. (unescape_nonascii): new function. (rb_reg_initialize): preprocess regexp. * pack.c (rb_uv_to_utf8): renamed from uv_to_utf8. * parse.y (STR_NEW3): take func instead of has8 and hasmb. (parser_str_new): use default coderange mechanism except for regexp. (parser_tokadd_utf8): copy regexp source as-is. (parser_read_escape): UTF-8 stuff removed. (parser_tokadd_escape): has8bit and hasmb removed. (parser_tokadd_string): fix 8-bit single byte character with \u. (parser_parse_string): has8bit and hasmb removed. (parser_here_document): has8bit and hasmb removed. (parser_yylex): call parser_tokadd_utf8 instead of read_escape for UTF-8 character. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14072 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- test/ruby/test_m17n.rb | 27 ++++++++---- test/ruby/test_unicode_escape.rb | 91 ++++++++++++++++++++++++++-------------- 2 files changed, 79 insertions(+), 39 deletions(-) (limited to 'test') diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index fd183967a3..bb0a8a5010 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -25,6 +25,17 @@ class TestM17N < Test::Unit::TestCase assert_encoding("EUC-JP", eval(e(%{"\\x80"})).encoding) end + def test_string_mixed_unicode + assert_raise(SyntaxError) { eval(a(%{"\xc0\xa0\\u{6666}"})) } + assert_raise(SyntaxError) { eval(e(%{"\xc0\xa0\\u{6666}"})) } + assert_raise(SyntaxError) { eval(s(%{"\xc0\xa0\\u{6666}"})) } + assert_nothing_raised { eval(u(%{"\xc0\xa0\\u{6666}"})) } + assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc0\xa0"})) } + assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc0\xa0"})) } + assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc0\xa0"})) } + assert_nothing_raised { eval(u(%{"\\u{6666}\xc0\xa0"})) } + end + def test_regexp_too_short_multibyte_character assert_raise(SyntaxError) { eval('/\xfe/e') } assert_raise(SyntaxError) { eval('/\x8e/e') } @@ -38,11 +49,12 @@ class TestM17N < Test::Unit::TestCase assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') } # raw 8bit - #assert_raise(SyntaxError) { eval("/\xfe/e") } - #assert_raise(SyntaxError) { eval("/\xc0/u") } + assert_raise(SyntaxError) { eval("/\xfe/e") } + assert_raise(SyntaxError) { eval("/\xc0/u") } # invalid suffix - #assert_raise(SyntaxError) { eval('/\xc0\xff/u') } + assert_raise(SyntaxError) { eval('/\xc0\xff/u') } + assert_raise(SyntaxError) { eval('/\xc0 /u') } #assert_raise(SyntaxError) { eval('/\xc0\x20/u') } end @@ -94,6 +106,9 @@ class TestM17N < Test::Unit::TestCase def test_regexp_generic assert_regexp_generic_ascii(/a/) assert_regexp_generic_ascii(Regexp.new(a("a"))) + assert_regexp_generic_ascii(Regexp.new(e("a"))) + assert_regexp_generic_ascii(Regexp.new(s("a"))) + assert_regexp_generic_ascii(Regexp.new(u("a"))) [/a/, Regexp.new(a("a"))].each {|r| assert_equal(0, r =~ a("a")) @@ -112,7 +127,7 @@ class TestM17N < Test::Unit::TestCase assert_regexp_fixed_ascii8bit(/\xc0\xa1/n) assert_regexp_fixed_ascii8bit(eval(a(%{/\xc0\xa1/}))) assert_regexp_fixed_ascii8bit(eval(a(%{/\xc0\xa1/n}))) - # assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc0\xa1/}))) + assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc0\xa1/}))) [/a/n].each {|r| assert_equal(0, r =~ a("a")) @@ -139,12 +154,11 @@ class TestM17N < Test::Unit::TestCase def test_regexp_euc assert_regexp_fixed_eucjp(/a/e) - assert_regexp_fixed_eucjp(Regexp.new(e("a"))) assert_regexp_fixed_eucjp(/\xc0\xa1/e) assert_regexp_fixed_eucjp(eval(e(%{/\xc0\xa1/}))) assert_regexp_fixed_eucjp(eval(e(%q{/\xc0\xa1/}))) - [/a/e, Regexp.new(e("a"))].each {|r| + [/a/e].each {|r| assert_equal(0, r =~ a("a")) assert_equal(0, r =~ e("a")) assert_equal(0, r =~ s("a")) @@ -169,7 +183,6 @@ class TestM17N < Test::Unit::TestCase def test_regexp_sjis assert_regexp_fixed_sjis(/a/s) - assert_regexp_fixed_sjis(Regexp.new(s("a"))) assert_regexp_fixed_sjis(/\xc0\xa1/s) assert_regexp_fixed_sjis(eval(s(%{/\xc0\xa1/}))) assert_regexp_fixed_sjis(eval(s(%q{/\xc0\xa1/}))) diff --git a/test/ruby/test_unicode_escape.rb b/test/ruby/test_unicode_escape.rb index 46413cdcdb..a1800c66e6 100644 --- a/test/ruby/test_unicode_escape.rb +++ b/test/ruby/test_unicode_escape.rb @@ -68,47 +68,74 @@ EOS def test_regexp # Compare regexps to regexps - assert_equal(/Yukihiro Matsumoto - 松本行弘/, + assert_not_equal(/Yukihiro Matsumoto - 松本行弘/, /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/) - assert_equal(/Yukihiro Matsumoto - 松本行弘/, - /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/) - assert_equal(/Matz - まつもと ゆきひろ/, + assert_not_equal(/Yukihiro Matsumoto - 松本行弘/, + /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/) + assert_not_equal(/Matz - まつもと ゆきひろ/, /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/) - assert_equal(/Aoyama Gakuin University - 青山学院大学/, + assert_not_equal(/Aoyama Gakuin University - 青山学院大学/, /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/) - assert_equal(/青山学院大学/, /\u9752\u5C71\u5B66\u9662\u5927\u5B66/) - assert_equal(/Martin Dürst/, /Martin D\u00FCrst/) - assert_equal(/ü/, /\u00FC/) - assert_equal(/Martin Dürst/, /Martin D\u{FC}rst/) - assert_equal(/ü/, /\u{FC}/) - assert_equal(/ü/, %r{\u{FC}}) - assert_equal(/ü/i, %r{\u00FC}i) + assert_not_equal(/青山学院大学/, /\u9752\u5C71\u5B66\u9662\u5927\u5B66/) + assert_not_equal(/Martin Dürst/, /Martin D\u00FCrst/) + assert_not_equal(/ü/, /\u00FC/) + assert_not_equal(/Martin Dürst/, /Martin D\u{FC}rst/) + assert_not_equal(/ü/, /\u{FC}/) + assert_not_equal(/ü/, %r{\u{FC}}) + assert_not_equal(/ü/i, %r{\u00FC}i) + + assert_equal('Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18', + /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/.source) + assert_equal('Yukihiro Matsumoto - \u{677E 672C 884C 5F18}', + /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/.source) + assert_equal('Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D', + /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/.source) + assert_equal('Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66', + /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/.source) + assert_equal('\u9752\u5C71\u5B66\u9662\u5927\u5B66', + /\u9752\u5C71\u5B66\u9662\u5927\u5B66/.source) + assert_equal('Martin D\u00FCrst', /Martin D\u00FCrst/.source) + assert_equal('\u00FC', /\u00FC/.source) + assert_equal('Martin D\u{FC}rst', /Martin D\u{FC}rst/.source) + assert_equal('\u{FC}', /\u{FC}/.source) + assert_equal('\u{FC}', %r{\u{FC}}.source) + assert_equal('\u00FC', %r{\u00FC}i.source) # match strings to regexps - assert_equal("Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/, 0) - assert_equal("Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C/, 0) - assert_equal("Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/, 0) - assert_equal(%Q{Yukihiro Matsumoto - \u{677E 672C 884C 5F18}} =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/, 0) - assert_equal("Matz - まつもと ゆきひろ" =~ /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/, 0) - assert_equal("Aoyama Gakuin University - 青山学院大学" =~ /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/, 0) - assert_equal("青山学院大学" =~ /\u9752\u5C71\u5B66\u9662\u5927\u5B66/, 0) - assert_equal("Martin Dürst" =~ /Martin D\u00FCrst/, 0) - assert_equal("ü" =~ /\u00FC/, 0) - assert_equal("Martin Dürst" =~ /Martin D\u{FC}rst/, 0) - assert_equal("ü" =~ %r{\u{FC}}, 0) - assert_equal("ü" =~ %r{\u00FC}i, 0) + assert_equal(0, "Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/) + assert_equal(0, "Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C/) + assert_equal(0, "Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/) + assert_equal(0, %Q{Yukihiro Matsumoto - \u{677E 672C 884C 5F18}} =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/) + assert_equal(0, "Matz - まつもと ゆきひろ" =~ /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/) + assert_equal(0, "Aoyama Gakuin University - 青山学院大学" =~ /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/) + assert_equal(0, "青山学院大学" =~ /\u9752\u5C71\u5B66\u9662\u5927\u5B66/) + assert_equal(0, "Martin Dürst" =~ /Martin D\u00FCrst/) + assert_equal(0, "ü" =~ /\u00FC/) + assert_equal(0, "Martin Dürst" =~ /Martin D\u{FC}rst/) + assert_equal(0, "ü" =~ %r{\u{FC}}) + assert_equal(0, "ü" =~ %r{\u00FC}i) # Flip order of the two operands - assert_equal(/Martin D\u00FCrst/ =~ "Martin Dürst", 0) - assert_equal(/\u00FC/ =~ "testü", 4) - assert_equal(/Martin D\u{FC}rst/ =~ "fooMartin Dürstbar", 3) - assert_equal(%r{\u{FC}} =~ "fooübar", 3) + assert_equal(0, /Martin D\u00FCrst/ =~ "Martin Dürst") + assert_equal(4, /\u00FC/ =~ "testü") + assert_equal(3, /Martin D\u{FC}rst/ =~ "fooMartin Dürstbar") + assert_equal(3, %r{\u{FC}} =~ "fooübar") # Put \u in strings, literal character in regexp - assert_equal("Martin D\u00FCrst" =~ /Martin Dürst/, 0) - assert_equal("test\u00FC" =~ /ü/, 4) - assert_equal("fooMartin D\u{FC}rstbar" =~ /Martin Dürst/, 3) - assert_equal(%Q{foo\u{FC}bar} =~ %r<ü>, 3) + assert_equal(0, "Martin D\u00FCrst" =~ /Martin Dürst/) + assert_equal(4, "test\u00FC" =~ /ü/) + assert_equal(3, "fooMartin D\u{FC}rstbar" =~ /Martin Dürst/) + assert_equal(3, %Q{foo\u{FC}bar} =~ %r<ü>) + + assert_match(eval('/\u{2a}/'), "*") + assert_raise(SyntaxError) { eval('/\u{6666}/n') } + assert_raise(SyntaxError) { eval('/\u{6666}/e') } + assert_raise(SyntaxError) { eval('/\u{6666}/s') } + assert_nothing_raised { eval('/\u{6666}/u') } + end + + def test_dynamic_regexp + assert_match(Regexp.new("Martin D\\u{FC}rst"), "Martin Dürst") end def test_syntax_variants -- cgit v1.2.3