summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authornagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-08-11 12:17:58 +0000
committernagachika <nagachika@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-08-11 12:17:58 +0000
commitad6ffac7d6121c193b95c15bc011c9969967f554 (patch)
tree6ddd90a00ed0921a1201ca35767a1554c8eafe25 /string.c
parente68627da985d118be573cd9f7e30617cb2bd76fc (diff)
merge revision(s) 8aecc90974ab1ac87056f77e2cb3406c5c041504,2f6cc15cdb3d64135b29cfd5ee376a5a03ebbee7: [Backport #15965]
Hoisted out WIDE_ENCODINGS Fixed String#grapheme_clusters with wide encodings * string.c (get_reg_grapheme_cluster): make regexp from properly encoded sources fro wide-char encodings. [Bug #15965] * regparse.c (node_extended_grapheme_cluster): suppress false duplicated range warning for the time being. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@67741 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/string.c b/string.c
index f7e9c3d46c..1d1cd2fb06 100644
--- a/string.c
+++ b/string.c
@@ -8474,9 +8474,30 @@ get_reg_grapheme_cluster(rb_encoding *enc)
reg_grapheme_cluster = reg_grapheme_cluster_utf8;
}
if (!reg_grapheme_cluster) {
- const OnigUChar source[] = "\\X";
+ const OnigUChar source_ascii[] = "\\X";
OnigErrorInfo einfo;
- int r = onig_new(&reg_grapheme_cluster, source, source + sizeof(source) - 1,
+ const OnigUChar *source = source_ascii;
+ size_t source_len = sizeof(source_ascii) - 1;
+ switch (encidx) {
+#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
+#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
+#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
+#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
+#define CASE_UTF(e) \
+ case ENCINDEX_UTF_##e: { \
+ static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
+ source = source_UTF_##e; \
+ source_len = sizeof(source_UTF_##e); \
+ break; \
+ }
+ CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
+#undef CASE_UTF
+#undef CHARS_16BE
+#undef CHARS_16LE
+#undef CHARS_32BE
+#undef CHARS_32LE
+ }
+ int r = onig_new(&reg_grapheme_cluster, source, source + source_len,
ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
if (r) {
UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];