summaryrefslogtreecommitdiff
path: root/test/ruby/test_unicode_escape.rb
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-01 16:56:19 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-01 16:56:19 +0000
commit7ff702406a324ed12c69bc23a7cfaf066e401547 (patch)
treeaf1c1a9ee2d96bcc009c639271ef5031fd6a05dd /test/ruby/test_unicode_escape.rb
parentd92b461dd9f5ff0eae7d15435aab1507dfb53496 (diff)
* include/ruby/intern.h (rb_uv_to_utf8): declared.
* re.c (rb_reg_preprocess): new function for dynamic regexp with \u{} such as Regexp.new("\\u{6666}"). (rb_reg_prepare_re): preprocess regexp for recompiling. (read_escaped_byte): new function. (unescape_escaped_nonascii): new function. (append_utf8): new function. (unescape_unicode_list): new function. (unescape_unicode_bmp): new function. (unescape_nonascii): new function. (rb_reg_initialize): preprocess regexp. * pack.c (rb_uv_to_utf8): renamed from uv_to_utf8. * parse.y (STR_NEW3): take func instead of has8 and hasmb. (parser_str_new): use default coderange mechanism except for regexp. (parser_tokadd_utf8): copy regexp source as-is. (parser_read_escape): UTF-8 stuff removed. (parser_tokadd_escape): has8bit and hasmb removed. (parser_tokadd_string): fix 8-bit single byte character with \u. (parser_parse_string): has8bit and hasmb removed. (parser_here_document): has8bit and hasmb removed. (parser_yylex): call parser_tokadd_utf8 instead of read_escape for UTF-8 character. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14072 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'test/ruby/test_unicode_escape.rb')
-rw-r--r--test/ruby/test_unicode_escape.rb91
1 files changed, 59 insertions, 32 deletions
diff --git a/test/ruby/test_unicode_escape.rb b/test/ruby/test_unicode_escape.rb
index 46413cdcdb..a1800c66e6 100644
--- a/test/ruby/test_unicode_escape.rb
+++ b/test/ruby/test_unicode_escape.rb
@@ -68,47 +68,74 @@ EOS
def test_regexp
# Compare regexps to regexps
- assert_equal(/Yukihiro Matsumoto - 松本行弘/,
+ assert_not_equal(/Yukihiro Matsumoto - 松本行弘/,
/Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/)
- assert_equal(/Yukihiro Matsumoto - 松本行弘/,
- /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/)
- assert_equal(/Matz - まつもと ゆきひろ/,
+ assert_not_equal(/Yukihiro Matsumoto - 松本行弘/,
+ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/)
+ assert_not_equal(/Matz - まつもと ゆきひろ/,
/Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/)
- assert_equal(/Aoyama Gakuin University - 青山学院大学/,
+ assert_not_equal(/Aoyama Gakuin University - 青山学院大学/,
/Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/)
- assert_equal(/青山学院大学/, /\u9752\u5C71\u5B66\u9662\u5927\u5B66/)
- assert_equal(/Martin Dürst/, /Martin D\u00FCrst/)
- assert_equal(/ü/, /\u00FC/)
- assert_equal(/Martin Dürst/, /Martin D\u{FC}rst/)
- assert_equal(/ü/, /\u{FC}/)
- assert_equal(/ü/, %r{\u{FC}})
- assert_equal(/ü/i, %r{\u00FC}i)
+ assert_not_equal(/青山学院大学/, /\u9752\u5C71\u5B66\u9662\u5927\u5B66/)
+ assert_not_equal(/Martin Dürst/, /Martin D\u00FCrst/)
+ assert_not_equal(/ü/, /\u00FC/)
+ assert_not_equal(/Martin Dürst/, /Martin D\u{FC}rst/)
+ assert_not_equal(/ü/, /\u{FC}/)
+ assert_not_equal(/ü/, %r{\u{FC}})
+ assert_not_equal(/ü/i, %r{\u00FC}i)
+
+ assert_equal('Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18',
+ /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/.source)
+ assert_equal('Yukihiro Matsumoto - \u{677E 672C 884C 5F18}',
+ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/.source)
+ assert_equal('Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D',
+ /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/.source)
+ assert_equal('Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66',
+ /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/.source)
+ assert_equal('\u9752\u5C71\u5B66\u9662\u5927\u5B66',
+ /\u9752\u5C71\u5B66\u9662\u5927\u5B66/.source)
+ assert_equal('Martin D\u00FCrst', /Martin D\u00FCrst/.source)
+ assert_equal('\u00FC', /\u00FC/.source)
+ assert_equal('Martin D\u{FC}rst', /Martin D\u{FC}rst/.source)
+ assert_equal('\u{FC}', /\u{FC}/.source)
+ assert_equal('\u{FC}', %r{\u{FC}}.source)
+ assert_equal('\u00FC', %r{\u00FC}i.source)
# match strings to regexps
- assert_equal("Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/, 0)
- assert_equal("Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C/, 0)
- assert_equal("Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/, 0)
- assert_equal(%Q{Yukihiro Matsumoto - \u{677E 672C 884C 5F18}} =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/, 0)
- assert_equal("Matz - まつもと ゆきひろ" =~ /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/, 0)
- assert_equal("Aoyama Gakuin University - 青山学院大学" =~ /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/, 0)
- assert_equal("青山学院大学" =~ /\u9752\u5C71\u5B66\u9662\u5927\u5B66/, 0)
- assert_equal("Martin Dürst" =~ /Martin D\u00FCrst/, 0)
- assert_equal("ü" =~ /\u00FC/, 0)
- assert_equal("Martin Dürst" =~ /Martin D\u{FC}rst/, 0)
- assert_equal("ü" =~ %r{\u{FC}}, 0)
- assert_equal("ü" =~ %r{\u00FC}i, 0)
+ assert_equal(0, "Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18/)
+ assert_equal(0, "Yukihiro Matsumoto - \u677E\u672C\u884C\u5F18" =~ /Yukihiro Matsumoto - \u677E\u672C\u884C/)
+ assert_equal(0, "Yukihiro Matsumoto - 松本行弘" =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/)
+ assert_equal(0, %Q{Yukihiro Matsumoto - \u{677E 672C 884C 5F18}} =~ /Yukihiro Matsumoto - \u{677E 672C 884C 5F18}/)
+ assert_equal(0, "Matz - まつもと ゆきひろ" =~ /Matz - \u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D/)
+ assert_equal(0, "Aoyama Gakuin University - 青山学院大学" =~ /Aoyama Gakuin University - \u9752\u5C71\u5B66\u9662\u5927\u5B66/)
+ assert_equal(0, "青山学院大学" =~ /\u9752\u5C71\u5B66\u9662\u5927\u5B66/)
+ assert_equal(0, "Martin Dürst" =~ /Martin D\u00FCrst/)
+ assert_equal(0, "ü" =~ /\u00FC/)
+ assert_equal(0, "Martin Dürst" =~ /Martin D\u{FC}rst/)
+ assert_equal(0, "ü" =~ %r{\u{FC}})
+ assert_equal(0, "ü" =~ %r{\u00FC}i)
# Flip order of the two operands
- assert_equal(/Martin D\u00FCrst/ =~ "Martin Dürst", 0)
- assert_equal(/\u00FC/ =~ "testü", 4)
- assert_equal(/Martin D\u{FC}rst/ =~ "fooMartin Dürstbar", 3)
- assert_equal(%r{\u{FC}} =~ "fooübar", 3)
+ assert_equal(0, /Martin D\u00FCrst/ =~ "Martin Dürst")
+ assert_equal(4, /\u00FC/ =~ "testü")
+ assert_equal(3, /Martin D\u{FC}rst/ =~ "fooMartin Dürstbar")
+ assert_equal(3, %r{\u{FC}} =~ "fooübar")
# Put \u in strings, literal character in regexp
- assert_equal("Martin D\u00FCrst" =~ /Martin Dürst/, 0)
- assert_equal("test\u00FC" =~ /ü/, 4)
- assert_equal("fooMartin D\u{FC}rstbar" =~ /Martin Dürst/, 3)
- assert_equal(%Q{foo\u{FC}bar} =~ %r<ü>, 3)
+ assert_equal(0, "Martin D\u00FCrst" =~ /Martin Dürst/)
+ assert_equal(4, "test\u00FC" =~ /ü/)
+ assert_equal(3, "fooMartin D\u{FC}rstbar" =~ /Martin Dürst/)
+ assert_equal(3, %Q{foo\u{FC}bar} =~ %r<ü>)
+
+ assert_match(eval('/\u{2a}/'), "*")
+ assert_raise(SyntaxError) { eval('/\u{6666}/n') }
+ assert_raise(SyntaxError) { eval('/\u{6666}/e') }
+ assert_raise(SyntaxError) { eval('/\u{6666}/s') }
+ assert_nothing_raised { eval('/\u{6666}/u') }
+ end
+
+ def test_dynamic_regexp
+ assert_match(Regexp.new("Martin D\\u{FC}rst"), "Martin Dürst")
end
def test_syntax_variants