summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-03-29 07:53:43 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-03-29 07:53:43 +0000
commit78f540019a394421e1875cacaf956e8c23b18cc0 (patch)
tree9b1e1d75453aa54f7cde6ec5a31b51c4eec56460
parent49f25a1299e04d3423351ab9bc212b8cdd6547a3 (diff)
* enc/unicode/case-folding.rb, casefold.h: Tweaked handling of 6
special cases in CaseUnfold_11_Table. * enc/unicode.c: Adjustments for above. * test/ruby/enc/test_case_mapping.rb: Tests for the above: Some tests in test_titlecase activated; test_greek added. A test in test_cherokee fixed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54383 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--enc/unicode.c15
-rwxr-xr-xenc/unicode/case-folding.rb73
-rw-r--r--enc/unicode/casefold.h18
-rw-r--r--test/ruby/enc/test_case_mapping.rb17
5 files changed, 85 insertions, 48 deletions
diff --git a/ChangeLog b/ChangeLog
index 9a40fc0..19e5759 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Tue Mar 29 16:53:44 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * enc/unicode/case-folding.rb, casefold.h: Tweaked handling of 6
+ special cases in CaseUnfold_11_Table.
+
+ * enc/unicode.c: Adjustments for above.
+
+ * test/ruby/enc/test_case_mapping.rb: Tests for the above: Some tests in
+ test_titlecase activated; test_greek added. A test in test_cherokee fixed.
+
Tue Mar 29 13:31:00 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
* enc/unicode.c: Cleaned up some comments.
diff --git a/enc/unicode.c b/enc/unicode.c
index 87ebb0d..eebf060 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -750,12 +750,17 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
}
}
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */
- if (flags&OnigCaseFoldFlags(folded->n)) {
- int count = OnigCodePointCount(folded->n);
- const OnigCodePoint *next = folded->code;
+ if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
MODIFIED;
- if (count==1)
- code = *next;
+ if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE)
+ code = folded->code[1];
+ else
+ code = folded->code[0];
+ }
+ else if ((flags&(ONIGENC_CASE_UPCASE))
+ && (code==0x03B9||code==0x03BC)) { /* GREEK SMALL LETTERs IOTA/MU */
+ MODIFIED;
+ code = folded->code[1];
}
}
}
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index d3738be..2df4301 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -230,38 +230,61 @@ class CaseMapping
def flags(from, type, to)
# types: CaseFold_11, CaseUnfold_11, CaseUnfold_12, CaseUnfold_13
flags = ""
- flags += '|F' if type=='CaseFold_11'
from = Array(from).map {|i| "%04X" % i}.join(" ")
to = Array(to).map {|i| "%04X" % i}.join(" ")
- to = to.split(/ /).first if type=='CaseUnfold_11'
item = @mappings[from]
- if item
- flags += '|U' if to==item.upper
- flags += '|D' if to==item.lower
- specials_index = nil
- specials = []
- unless item.upper == item.title
- if item.code == item.title
- flags += '|IT'
- else
- flags += '|ST'
- specials << item.title
+ specials_index = nil
+ specials = []
+ case type
+ when 'CaseFold_11'
+ flags += '|F'
+ if item
+ flags += '|U' if to==item.upper
+ flags += '|D' if to==item.lower
+ unless item.upper == item.title
+ if item.code == item.title
+ flags += '|IT'
+ else
+ flags += '|ST'
+ specials << item.title
+ end
+ end
+ unless item.lower.nil? or item.lower==from or item.lower==to
+ specials << item.lower
+ flags += '|SL'
+ end
+ unless item.upper.nil? or item.upper==from or item.upper==to
+ specials << item.upper
+ flags += '|SU'
end
end
- unless item.lower.nil? or item.lower==from or item.lower==to
- specials << item.lower
- flags += '|SL'
- end
- unless item.upper.nil? or item.upper==from or item.upper==to
- specials << item.upper
- flags += '|SU'
- end
- if specials.first
- flags += "|I(#{@specials_length})"
- @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
- @specials << specials
+ when 'CaseUnfold_11'
+ to = to.split(/ /)
+ if item
+ case to.first
+ when item.upper then flags += '|U'
+ when item.lower then flags += '|D'
+ else
+ unless from=='03B9' or from=='03BC'
+ warn 'Unpredicted case 0; check data or adjust program (enc/unicode/case_folding.rb).'
+ end
+ end
+ unless item.upper == item.title
+ if item.code == item.title
+ warn 'Unpredicted case 1; check data or adjust program (enc/unicode/case_folding.rb).'
+ elsif item.title==to[1]
+ flags += '|ST'
+ else
+ warn 'Unpredicted case 2; check data or adjust program (enc/unicode/case_folding.rb).'
+ end
+ end
end
end
+ unless specials.empty?
+ flags += "|I(#{@specials_length})"
+ @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
+ @specials << specials
+ end
flags
end
diff --git a/enc/unicode/casefold.h b/enc/unicode/casefold.h
index c6c5d0d..27beb54 100644
--- a/enc/unicode/casefold.h
+++ b/enc/unicode/casefold.h
@@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01b9, {1|U, {0x01b8}}},
{0x01bd, {1|U, {0x01bc}}},
{0x01bf, {1|U, {0x01f7}}},
- {0x01c6, {2|U|ST|I(347), {0x01c4, 0x01c5}}},
- {0x01c9, {2|U|ST|I(348), {0x01c7, 0x01c8}}},
- {0x01cc, {2|U|ST|I(349), {0x01ca, 0x01cb}}},
+ {0x01c6, {2|U|ST, {0x01c4, 0x01c5}}},
+ {0x01c9, {2|U|ST, {0x01c7, 0x01c8}}},
+ {0x01cc, {2|U|ST, {0x01ca, 0x01cb}}},
{0x01ce, {1|U, {0x01cd}}},
{0x01d0, {1|U, {0x01cf}}},
{0x01d2, {1|U, {0x01d1}}},
@@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01eb, {1|U, {0x01ea}}},
{0x01ed, {1|U, {0x01ec}}},
{0x01ef, {1|U, {0x01ee}}},
- {0x01f3, {2|U|ST|I(350), {0x01f1, 0x01f2}}},
+ {0x01f3, {2|U|ST, {0x01f1, 0x01f2}}},
{0x01f5, {1|U, {0x01f4}}},
{0x01f9, {1|U, {0x01f8}}},
{0x01fb, {1|U, {0x01fa}}},
@@ -3412,10 +3412,10 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x03b6, {1|U, {0x0396}}},
{0x03b7, {1|U, {0x0397}}},
{0x03b8, {3|U, {0x0398, 0x03d1, 0x03f4}}},
- {0x03b9, {3|SU|I(351), {0x0345, 0x0399, 0x1fbe}}},
+ {0x03b9, {3, {0x0345, 0x0399, 0x1fbe}}},
{0x03ba, {2|U, {0x039a, 0x03f0}}},
{0x03bb, {1|U, {0x039b}}},
- {0x03bc, {2|SU|I(352), {0x00b5, 0x039c}}},
+ {0x03bc, {2, {0x00b5, 0x039c}}},
{0x03bd, {1|U, {0x039d}}},
{0x03be, {1|U, {0x039e}}},
{0x03bf, {1|U, {0x039f}}},
@@ -6371,10 +6371,4 @@ OnigCodePoint CaseMappingSpecials[] = {
L(2)|0x0544, 0x056B, L(2)|0x0544, 0x053B,
L(2)|0x054E, 0x0576, L(2)|0x054E, 0x0546,
L(2)|0x0544, 0x056D, L(2)|0x0544, 0x053D,
- L(1)|0x01C5,
- L(1)|0x01C8,
- L(1)|0x01CB,
- L(1)|0x01F2,
- L(1)|0x0399,
- L(1)|0x039C,
};
diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb
index 5d028d9..b52d86b 100644
--- a/test/ruby/enc/test_case_mapping.rb
+++ b/test/ruby/enc/test_case_mapping.rb
@@ -74,7 +74,7 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
check_downcase_properties "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', :lithuanian
check_upcase_properties 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", :lithuanian
check_capitalize_suffixes "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79", 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'
- assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', :fold
+ assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ'.downcase(:fold)
assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79".downcase(:fold)
end
@@ -82,15 +82,15 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
check_downcase_properties 'dz dž lj nj', 'Dz Dž Lj Nj', :lithuanian
check_downcase_properties 'dz dž lj nj', 'DZ DŽ LJ NJ', :lithuanian
check_upcase_properties 'DZ DŽ LJ NJ', 'Dz Dž Lj Nj', :lithuanian
- # check_upcase_properties 'DZ DŽ LJ NJ', 'dz dž lj nj', :lithuanian
+ check_upcase_properties 'DZ DŽ LJ NJ', 'dz dž lj nj', :lithuanian
check_capitalize_properties 'Dz', 'DZ', :lithuanian
check_capitalize_properties 'Dž', 'DŽ', :lithuanian
check_capitalize_properties 'Lj', 'LJ', :lithuanian
check_capitalize_properties 'Nj', 'NJ', :lithuanian
- # check_capitalize_properties 'Dz', 'dz', :lithuanian
- # check_capitalize_properties 'Dž', 'dž', :lithuanian
- # check_capitalize_properties 'Lj', 'lj', :lithuanian
- # check_capitalize_properties 'Nj', 'nj', :lithuanian
+ check_capitalize_properties 'Dz', 'dz', :lithuanian
+ check_capitalize_properties 'Dž', 'dž', :lithuanian
+ check_capitalize_properties 'Lj', 'lj', :lithuanian
+ check_capitalize_properties 'Nj', 'nj', :lithuanian
end
def test_ascii_option
@@ -116,6 +116,11 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
check_downcase_properties "yuki\u0307hi\u0307ro matsumoto (matz)", 'YUKİHİRO MATSUMOTO (MATZ)', :lithuanian
end
+ def test_greek
+ check_downcase_properties 'αβγδεζηθικλμνξοπρστυφχψω', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ', :lithuanian
+ check_upcase_properties 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ', 'αβγδεζηθικλμνξοπρστυφχψω', :lithuanian
+ end
+
def no_longer_a_test_buffer_allocations
assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic, :lithuanian)
assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic, :lithuanian)