summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--enc/unicode.c6
-rw-r--r--string.c2
-rw-r--r--test/ruby/enc/test_case_comprehensive.rb6
-rw-r--r--test/ruby/enc/test_case_mapping.rb17
4 files changed, 29 insertions, 2 deletions
diff --git a/enc/unicode.c b/enc/unicode.c
index b3dbd55..6e8c3d8 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -719,7 +719,11 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
}
}
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
- if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
+ if ((flags & ONIGENC_CASE_TITLECASE) && code>=0x1C90 && code<=0x1CBF) { /* Georgian MTAVRULI */
+ MODIFIED;
+ code += 0x10D0 - 0x1C90;
+ }
+ else if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
&& (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
/* already Titlecase, no changes needed */
}
diff --git a/string.c b/string.c
index 55a3043..4fb2c04 100644
--- a/string.c
+++ b/string.c
@@ -6727,6 +6727,8 @@ rb_str_downcase(int argc, VALUE *argv, VALUE str)
*
* Modifies <i>str</i> by converting the first character to uppercase and the
* remainder to lowercase. Returns <code>nil</code> if no changes are made.
+ * There is an exception for modern Georgian (mkhedruli/MTAVRULI), where
+ * the result is the same as for String#downcase, to avoid mixed case.
*
* See String#downcase for meaning of +options+ and use with different encodings.
*
diff --git a/test/ruby/enc/test_case_comprehensive.rb b/test/ruby/enc/test_case_comprehensive.rb
index cd6447e..bde4701 100644
--- a/test/ruby/enc/test_case_comprehensive.rb
+++ b/test/ruby/enc/test_case_comprehensive.rb
@@ -73,7 +73,11 @@ TestComprehensiveCaseMapping.data_files_available? and class TestComprehensiveC
@@codepoints << code
upcase[code] = hex2utf8 data[12] unless data[12].empty?
downcase[code] = hex2utf8 data[13] unless data[13].empty?
- titlecase[code] = hex2utf8 data[14] unless data[14].empty?
+ if code>="\u1C90" and code<="\u1CBF" # exception for Georgian: use lowercase for titlecase
+ titlecase[code] = hex2utf8(data[13]) unless data[13].empty?
+ else
+ titlecase[code] = hex2utf8 data[14] unless data[14].empty?
+ end
end
read_data_file('CaseFolding') do |code, data|
casefold[code] = hex2utf8(data[2]) if data[1] =~ /^[CF]$/
diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb
index d095cd5..984fd5d 100644
--- a/test/ruby/enc/test_case_mapping.rb
+++ b/test/ruby/enc/test_case_mapping.rb
@@ -187,6 +187,23 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
assert_equal 0, "\ua64A" =~ /\uA64B/i
end
+ def test_georgian_canary
+ message = "Reexamine implementation of Georgian in String#capitalize"
+ assert_equal false, "\u1CBB".match?(/\p{assigned}/), message
+ assert_equal false, "\u1CBC".match?(/\p{assigned}/), message
+ end
+
+ def test_georgian_capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u1C92".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u10D2".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u1C92".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u10D2".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u1C92".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u10D2".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u1C92".capitalize
+ assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u10D2".capitalize
+ end
+
def no_longer_a_test_buffer_allocations
assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic)
assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic)