summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regparse.c6
-rw-r--r--string.c25
-rw-r--r--test/ruby/test_string.rb31
-rw-r--r--version.h4
4 files changed, 50 insertions, 16 deletions
diff --git a/regparse.c b/regparse.c
index 5e51e39502..574a07e05d 100644
--- a/regparse.c
+++ b/regparse.c
@@ -5961,6 +5961,10 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
BBuf *inverted_buf = NULL;
+ /* TODO: fix false warning */
+ const int dup_not_warned = env->warnings_flag | ~ONIG_SYN_WARN_CC_DUP;
+ env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
+
/* Start with a positive buffer and invert at the end.
* Otherwise, adding single-character ranges work the wrong way. */
R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
@@ -5968,6 +5972,8 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env));
cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */
+
+ env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */
}
else {
R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 1, env));
diff --git a/string.c b/string.c
index f7e9c3d46c..1d1cd2fb06 100644
--- a/string.c
+++ b/string.c
@@ -8474,9 +8474,30 @@ get_reg_grapheme_cluster(rb_encoding *enc)
reg_grapheme_cluster = reg_grapheme_cluster_utf8;
}
if (!reg_grapheme_cluster) {
- const OnigUChar source[] = "\\X";
+ const OnigUChar source_ascii[] = "\\X";
OnigErrorInfo einfo;
- int r = onig_new(&reg_grapheme_cluster, source, source + sizeof(source) - 1,
+ const OnigUChar *source = source_ascii;
+ size_t source_len = sizeof(source_ascii) - 1;
+ switch (encidx) {
+#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
+#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
+#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
+#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
+#define CASE_UTF(e) \
+ case ENCINDEX_UTF_##e: { \
+ static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
+ source = source_UTF_##e; \
+ source_len = sizeof(source_UTF_##e); \
+ break; \
+ }
+ CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
+#undef CASE_UTF
+#undef CHARS_16BE
+#undef CHARS_16LE
+#undef CHARS_32BE
+#undef CHARS_32LE
+ }
+ int r = onig_new(&reg_grapheme_cluster, source, source + source_len,
ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
if (r) {
UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 55dcfb5f35..aad4277d5f 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -4,6 +4,11 @@ require 'test/unit'
class TestString < Test::Unit::TestCase
ENUMERATOR_WANTARRAY = RUBY_VERSION >= "3.0.0"
+ WIDE_ENCODINGS = [
+ Encoding::UTF_16BE, Encoding::UTF_16LE,
+ Encoding::UTF_32BE, Encoding::UTF_32LE,
+ ]
+
def initialize(*args)
@cls = String
@aref_re_nth = true
@@ -667,8 +672,7 @@ CODE
assert_raise(ArgumentError) {S("mypassword").crypt(S("\0a"))}
assert_raise(ArgumentError) {S("mypassword").crypt(S("a\0"))}
assert_raise(ArgumentError) {S("poison\u0000null").crypt(S("aa"))}
- [Encoding::UTF_16BE, Encoding::UTF_16LE,
- Encoding::UTF_32BE, Encoding::UTF_32LE].each do |enc|
+ WIDE_ENCODINGS.each do |enc|
assert_raise(ArgumentError) {S("mypassword").crypt(S("aa".encode(enc)))}
assert_raise(ArgumentError) {S("mypassword".encode(enc)).crypt(S("aa"))}
end
@@ -1032,13 +1036,20 @@ CODE
"\u{1F468 200D 1F393}",
"\u{1F46F 200D 2642 FE0F}",
"\u{1f469 200d 2764 fe0f 200d 1f469}",
- ].each do |g|
+ ].product([Encoding::UTF_8, *WIDE_ENCODINGS]) do |g, enc|
+ g = g.encode(enc)
assert_equal [g], g.grapheme_clusters
- assert_predicate g.dup.taint.grapheme_clusters[0], :tainted?
+ assert_predicate g.taint.grapheme_clusters[0], :tainted?
end
- assert_equal ["\u000A", "\u0324"], "\u{a 324}".grapheme_clusters
- assert_equal ["\u000D", "\u0324"], "\u{d 324}".grapheme_clusters
+ [
+ "\u{a 324}",
+ "\u{d 324}",
+ "abc",
+ ].product([Encoding::UTF_8, *WIDE_ENCODINGS]) do |g, enc|
+ g = g.encode(enc)
+ assert_equal g.chars, g.grapheme_clusters
+ end
assert_equal ["a", "b", "c"], "abc".b.grapheme_clusters
if ENUMERATOR_WANTARRAY
@@ -1805,10 +1816,7 @@ CODE
def test_split_wchar
bug8642 = '[ruby-core:56036] [Bug #8642]'
- [
- Encoding::UTF_16BE, Encoding::UTF_16LE,
- Encoding::UTF_32BE, Encoding::UTF_32LE,
- ].each do |enc|
+ WIDE_ENCODINGS.each do |enc|
s = S("abc,def".encode(enc))
assert_equal(["abc", "def"].map {|c| c.encode(enc)},
s.split(",".encode(enc)),
@@ -3018,8 +3026,7 @@ CODE
def test_ascii_incomat_inspect
bug4081 = '[ruby-core:33283]'
- [Encoding::UTF_16LE, Encoding::UTF_16BE,
- Encoding::UTF_32LE, Encoding::UTF_32BE].each do |e|
+ WIDE_ENCODINGS.each do |e|
assert_equal('"abc"', "abc".encode(e).inspect)
assert_equal('"\\u3042\\u3044\\u3046"', "\u3042\u3044\u3046".encode(e).inspect)
assert_equal('"ab\\"c"', "ab\"c".encode(e).inspect, bug4081)
diff --git a/version.h b/version.h
index f043008bce..7ff4100f7b 100644
--- a/version.h
+++ b/version.h
@@ -1,10 +1,10 @@
#define RUBY_VERSION "2.6.3"
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
-#define RUBY_PATCHLEVEL 91
+#define RUBY_PATCHLEVEL 92
#define RUBY_RELEASE_YEAR 2019
#define RUBY_RELEASE_MONTH 8
-#define RUBY_RELEASE_DAY 9
+#define RUBY_RELEASE_DAY 11
#include "ruby/version.h"