summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-11-28 13:36:08 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-11-28 13:36:08 +0000
commit29eae8b1e9a5142b30250d0a9cc738b4ce94eadc (patch)
treea4957d36f174339bcc73d2bf573bda40af4213ca /string.c
parentb1944e41f5f5711e3b79bf08f2b54da1d7d6890b (diff)
merge revision(s) 65954,65955,65958: [Backport #15337]
Don't use single byte optimization on grapheme clusters Unicode Text Segmentation considers CRLF as a character. [Bug #15337] add tests using Unicode test data for grapheme clusters Add file test/ruby/enc/test_grapheme_breaks.rb to test String#each_grapheme_cluster and \X extended grapheme cluster matcher in regular expressions against test data provided by Unicode (ucd/auxiliary/GraphemeBreakTest.txt). Some lines in the data file are ignored, as follows: - Lines with a surrogate, because Ruby doesn't handle these - The case of "\r\n", because there is a bug (#15337) in the implementation remove guard against bug #15337, because it is fixed git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@66073 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/string.c b/string.c
index 80749c22a0..56db697e3c 100644
--- a/string.c
+++ b/string.c
@@ -8342,7 +8342,7 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
const char *ptr, *end;
- if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+ if (!rb_enc_unicode_p(enc)) {
return rb_str_length(str);
}
@@ -8370,7 +8370,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
const char *ptr, *end;
- if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+ if (!rb_enc_unicode_p(enc)) {
return rb_str_enumerate_chars(str, ary);
}