diff options
author | nagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-11-28 13:36:08 +0000 |
---|---|---|
committer | nagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-11-28 13:36:08 +0000 |
commit | 29eae8b1e9a5142b30250d0a9cc738b4ce94eadc (patch) | |
tree | a4957d36f174339bcc73d2bf573bda40af4213ca /string.c | |
parent | b1944e41f5f5711e3b79bf08f2b54da1d7d6890b (diff) |
merge revision(s) 65954,65955,65958: [Backport #15337]
Don't use single byte optimization on grapheme clusters
Unicode Text Segmentation considers CRLF as a character. [Bug #15337]
add tests using Unicode test data for grapheme clusters
Add file test/ruby/enc/test_grapheme_breaks.rb to test String#each_grapheme_cluster
and \X extended grapheme cluster matcher in regular expressions against test data
provided by Unicode (ucd/auxiliary/GraphemeBreakTest.txt).
Some lines in the data file are ignored, as follows:
- Lines with a surrogate, because Ruby doesn't handle these
- The case of "\r\n", because there is a bug (#15337) in the implementation
remove guard against bug #15337, because it is fixed
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@66073 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 4 |
1 files changed, 2 insertions, 2 deletions
@@ -8342,7 +8342,7 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc)) { return rb_str_length(str); } @@ -8370,7 +8370,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc)) { return rb_str_enumerate_chars(str, ary); } |