summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-22 07:58:38 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-22 07:58:38 +0000
commit41b2ef468597a120d52f3f73cda47cd284ab1f99 (patch)
treea5293314b6ab1afc29fd110f73a0cca833988c1b /string.c
parent6e0f5b8407b625a3039d93f48b56aac5695aa48e (diff)
fix each_grapheme_cluster's size [Bug #14363]
From: Hugo Peixoto <hugo.peixoto@gmail.com> git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c52
1 files changed, 51 insertions, 1 deletions
diff --git a/string.c b/string.c
index 0ab2012c89..bd7079f05d 100644
--- a/string.c
+++ b/string.c
@@ -8356,6 +8356,56 @@ rb_str_codepoints(VALUE str)
}
static VALUE
+rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
+{
+ long grapheme_cluster_count = 0;
+ regex_t *reg_grapheme_cluster = NULL;
+ static regex_t *reg_grapheme_cluster_utf8 = NULL;
+ int encidx = ENCODING_GET(str);
+ rb_encoding *enc = rb_enc_from_index(encidx);
+ int unicode_p = rb_enc_unicode_p(enc);
+ const char *ptr, *end;
+
+ if (!unicode_p || single_byte_optimizable(str)) {
+ return rb_str_length(str);
+ }
+
+ /* synchronize */
+ if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
+ reg_grapheme_cluster = reg_grapheme_cluster_utf8;
+ }
+ if (!reg_grapheme_cluster) {
+ const OnigUChar source[] = "\\X";
+ int r = onig_new(&reg_grapheme_cluster, source, source + sizeof(source) - 1,
+ ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, NULL);
+ if (r) {
+ rb_bug("cannot compile grapheme cluster regexp");
+ }
+ if (encidx == rb_utf8_encindex()) {
+ reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
+ }
+ }
+
+ ptr = RSTRING_PTR(str);
+ end = RSTRING_END(str);
+
+ while (ptr < end) {
+ OnigPosition len = onig_match(reg_grapheme_cluster,
+ (const OnigUChar *)ptr, (const OnigUChar *)end,
+ (const OnigUChar *)ptr, NULL, 0);
+ if (len == 0) break;
+ if (len < 0) {
+ break;
+ }
+ grapheme_cluster_count++;
+ ptr += len;
+ }
+ RB_GC_GUARD(str);
+
+ return LONG2NUM(grapheme_cluster_count);
+}
+
+static VALUE
rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
{
VALUE orig = str;
@@ -8426,7 +8476,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
static VALUE
rb_str_each_grapheme_cluster(VALUE str)
{
- RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
+ RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
return rb_str_enumerate_grapheme_clusters(str, 0);
}