From 2f6cc15cdb3d64135b29cfd5ee376a5a03ebbee7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 29 Jun 2019 10:10:17 +0900 Subject: Fixed String#grapheme_clusters with wide encodings * string.c (get_reg_grapheme_cluster): make regexp from properly encoded sources fro wide-char encodings. [Bug #15965] * regparse.c (node_extended_grapheme_cluster): suppress false duplicated range warning for the time being. --- test/ruby/test_string.rb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'test/ruby/test_string.rb') diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 8eb806c032..507e067a0d 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1036,13 +1036,20 @@ CODE "\u{1F468 200D 1F393}", "\u{1F46F 200D 2642 FE0F}", "\u{1f469 200d 2764 fe0f 200d 1f469}", - ].each do |g| + ].product([Encoding::UTF_8, *WIDE_ENCODINGS]) do |g, enc| + g = g.encode(enc) assert_equal [g], g.grapheme_clusters - assert_predicate g.dup.taint.grapheme_clusters[0], :tainted? + assert_predicate g.taint.grapheme_clusters[0], :tainted? end - assert_equal ["\u000A", "\u0324"], "\u{a 324}".grapheme_clusters - assert_equal ["\u000D", "\u0324"], "\u{d 324}".grapheme_clusters + [ + "\u{a 324}", + "\u{d 324}", + "abc", + ].product([Encoding::UTF_8, *WIDE_ENCODINGS]) do |g, enc| + g = g.encode(enc) + assert_equal g.chars, g.grapheme_clusters + end assert_equal ["a", "b", "c"], "abc".b.grapheme_clusters if ENUMERATOR_WANTARRAY -- cgit v1.2.3