diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-12-09 23:14:29 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-12-09 23:14:29 +0000 |
commit | 3628eae2e754a7489feebc6f41371d42d2efcf3c (patch) | |
tree | af9f1aea84a5c0d0a6d8cc48b67409324220676d /test | |
parent | 596d7bdec4c389090afcddbd448282ec4bcfdf86 (diff) |
implement special behavior for Georgian for String#capitalize
The modern Georgian script is special in that it has an 'uppercase'
variant called MTAVRULI which can be used for emphasis of whole words,
for screamy headlines, and so on. However, in contrast to all other
bicameral scripts, there is no usage of capitalizing the first letter
in a word or a sentence. Words with mixed capitalization are not used
at all.
We therefore implement special behavior for String#capitalize. Formally,
we define String#capitalize as first applying String#downcase for the
whole string, then using titlecase on the first letter. Because Georgian
defines titlecase as the identity function both for MTAVRULI ('uppercase')
and Mkhedruli (lowercase), this results in String#capitalize being
equivalent to String#downcase for Georgian. This avoids undesirable
mixed case.
* enc/unicode.c: Actual implementation
* string.c: Add mention of this special case for documentation
* test/ruby/enc/test_case_mapping.rb: Add two tests, a general one
that uses String#capitalize on some (including nonsensical)
combinations of MTAVRULI and Mkhedruli, and a canary test to
detect the potential assignment of characters to the currently
open slots (holes) at U+1CBB and U+1CBC.
* test/ruby/enc/test_case_comprehensive.rb: Tweak generation of
expectation data.
Together with r65933, this closes issue #14839.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66300 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'test')
-rw-r--r-- | test/ruby/enc/test_case_comprehensive.rb | 6 | ||||
-rw-r--r-- | test/ruby/enc/test_case_mapping.rb | 17 |
2 files changed, 22 insertions, 1 deletions
diff --git a/test/ruby/enc/test_case_comprehensive.rb b/test/ruby/enc/test_case_comprehensive.rb index cd6447e928..bde47017a2 100644 --- a/test/ruby/enc/test_case_comprehensive.rb +++ b/test/ruby/enc/test_case_comprehensive.rb @@ -73,7 +73,11 @@ TestComprehensiveCaseMapping.data_files_available? and class TestComprehensiveC @@codepoints << code upcase[code] = hex2utf8 data[12] unless data[12].empty? downcase[code] = hex2utf8 data[13] unless data[13].empty? - titlecase[code] = hex2utf8 data[14] unless data[14].empty? + if code>="\u1C90" and code<="\u1CBF" # exception for Georgian: use lowercase for titlecase + titlecase[code] = hex2utf8(data[13]) unless data[13].empty? + else + titlecase[code] = hex2utf8 data[14] unless data[14].empty? + end end read_data_file('CaseFolding') do |code, data| casefold[code] = hex2utf8(data[2]) if data[1] =~ /^[CF]$/ diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index d095cd569c..984fd5d479 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -187,6 +187,23 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase assert_equal 0, "\ua64A" =~ /\uA64B/i end + def test_georgian_canary + message = "Reexamine implementation of Georgian in String#capitalize" + assert_equal false, "\u1CBB".match?(/\p{assigned}/), message + assert_equal false, "\u1CBC".match?(/\p{assigned}/), message + end + + def test_georgian_capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u10D2".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u10D2".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u10D2".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u10D2".capitalize + end + def no_longer_a_test_buffer_allocations assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic) assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic) |