diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-11-24 11:53:19 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-11-24 11:53:19 +0000 |
commit | 7850586af435f44ff97c93decc97995bbdf6bad4 (patch) | |
tree | 12c290c32f7acee2a3698e906ba40c24dcb72816 | |
parent | f33adbc11e0fa0a2bd73b96ee3a3529481eb111d (diff) |
Don't use single byte optimization on grapheme clusters
Unicode Text Segmentation considers CRLF as a character. [Bug #15337]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65954 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | string.c | 4 | ||||
-rw-r--r-- | test/ruby/test_string.rb | 1 |
2 files changed, 3 insertions, 2 deletions
@@ -8459,7 +8459,7 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc)) { return rb_str_length(str); } @@ -8487,7 +8487,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc)) { return rb_str_enumerate_chars(str, ary); } diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 1a6d87f11f..014eb5ec15 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -973,6 +973,7 @@ CODE def test_each_grapheme_cluster [ + "\u{0D 0A}", "\u{20 200d}", "\u{600 600}", "\u{600 20}", |