summaryrefslogtreecommitdiff
path: root/enc
diff options
context:
space:
mode:
Diffstat (limited to 'enc')
-rw-r--r--enc/unicode.c15
-rwxr-xr-xenc/unicode/case-folding.rb73
-rw-r--r--enc/unicode/casefold.h18
3 files changed, 64 insertions, 42 deletions
diff --git a/enc/unicode.c b/enc/unicode.c
index 87ebb0d..eebf060 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -750,12 +750,17 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
}
}
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */
- if (flags&OnigCaseFoldFlags(folded->n)) {
- int count = OnigCodePointCount(folded->n);
- const OnigCodePoint *next = folded->code;
+ if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
MODIFIED;
- if (count==1)
- code = *next;
+ if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE)
+ code = folded->code[1];
+ else
+ code = folded->code[0];
+ }
+ else if ((flags&(ONIGENC_CASE_UPCASE))
+ && (code==0x03B9||code==0x03BC)) { /* GREEK SMALL LETTERs IOTA/MU */
+ MODIFIED;
+ code = folded->code[1];
}
}
}
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index d3738be..2df4301 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -230,38 +230,61 @@ class CaseMapping
def flags(from, type, to)
# types: CaseFold_11, CaseUnfold_11, CaseUnfold_12, CaseUnfold_13
flags = ""
- flags += '|F' if type=='CaseFold_11'
from = Array(from).map {|i| "%04X" % i}.join(" ")
to = Array(to).map {|i| "%04X" % i}.join(" ")
- to = to.split(/ /).first if type=='CaseUnfold_11'
item = @mappings[from]
- if item
- flags += '|U' if to==item.upper
- flags += '|D' if to==item.lower
- specials_index = nil
- specials = []
- unless item.upper == item.title
- if item.code == item.title
- flags += '|IT'
- else
- flags += '|ST'
- specials << item.title
+ specials_index = nil
+ specials = []
+ case type
+ when 'CaseFold_11'
+ flags += '|F'
+ if item
+ flags += '|U' if to==item.upper
+ flags += '|D' if to==item.lower
+ unless item.upper == item.title
+ if item.code == item.title
+ flags += '|IT'
+ else
+ flags += '|ST'
+ specials << item.title
+ end
+ end
+ unless item.lower.nil? or item.lower==from or item.lower==to
+ specials << item.lower
+ flags += '|SL'
+ end
+ unless item.upper.nil? or item.upper==from or item.upper==to
+ specials << item.upper
+ flags += '|SU'
end
end
- unless item.lower.nil? or item.lower==from or item.lower==to
- specials << item.lower
- flags += '|SL'
- end
- unless item.upper.nil? or item.upper==from or item.upper==to
- specials << item.upper
- flags += '|SU'
- end
- if specials.first
- flags += "|I(#{@specials_length})"
- @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
- @specials << specials
+ when 'CaseUnfold_11'
+ to = to.split(/ /)
+ if item
+ case to.first
+ when item.upper then flags += '|U'
+ when item.lower then flags += '|D'
+ else
+ unless from=='03B9' or from=='03BC'
+ warn 'Unpredicted case 0; check data or adjust program (enc/unicode/case_folding.rb).'
+ end
+ end
+ unless item.upper == item.title
+ if item.code == item.title
+ warn 'Unpredicted case 1; check data or adjust program (enc/unicode/case_folding.rb).'
+ elsif item.title==to[1]
+ flags += '|ST'
+ else
+ warn 'Unpredicted case 2; check data or adjust program (enc/unicode/case_folding.rb).'
+ end
+ end
end
end
+ unless specials.empty?
+ flags += "|I(#{@specials_length})"
+ @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
+ @specials << specials
+ end
flags
end
diff --git a/enc/unicode/casefold.h b/enc/unicode/casefold.h
index c6c5d0d..27beb54 100644
--- a/enc/unicode/casefold.h
+++ b/enc/unicode/casefold.h
@@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01b9, {1|U, {0x01b8}}},
{0x01bd, {1|U, {0x01bc}}},
{0x01bf, {1|U, {0x01f7}}},
- {0x01c6, {2|U|ST|I(347), {0x01c4, 0x01c5}}},
- {0x01c9, {2|U|ST|I(348), {0x01c7, 0x01c8}}},
- {0x01cc, {2|U|ST|I(349), {0x01ca, 0x01cb}}},
+ {0x01c6, {2|U|ST, {0x01c4, 0x01c5}}},
+ {0x01c9, {2|U|ST, {0x01c7, 0x01c8}}},
+ {0x01cc, {2|U|ST, {0x01ca, 0x01cb}}},
{0x01ce, {1|U, {0x01cd}}},
{0x01d0, {1|U, {0x01cf}}},
{0x01d2, {1|U, {0x01d1}}},
@@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x01eb, {1|U, {0x01ea}}},
{0x01ed, {1|U, {0x01ec}}},
{0x01ef, {1|U, {0x01ee}}},
- {0x01f3, {2|U|ST|I(350), {0x01f1, 0x01f2}}},
+ {0x01f3, {2|U|ST, {0x01f1, 0x01f2}}},
{0x01f5, {1|U, {0x01f4}}},
{0x01f9, {1|U, {0x01f8}}},
{0x01fb, {1|U, {0x01fa}}},
@@ -3412,10 +3412,10 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x03b6, {1|U, {0x0396}}},
{0x03b7, {1|U, {0x0397}}},
{0x03b8, {3|U, {0x0398, 0x03d1, 0x03f4}}},
- {0x03b9, {3|SU|I(351), {0x0345, 0x0399, 0x1fbe}}},
+ {0x03b9, {3, {0x0345, 0x0399, 0x1fbe}}},
{0x03ba, {2|U, {0x039a, 0x03f0}}},
{0x03bb, {1|U, {0x039b}}},
- {0x03bc, {2|SU|I(352), {0x00b5, 0x039c}}},
+ {0x03bc, {2, {0x00b5, 0x039c}}},
{0x03bd, {1|U, {0x039d}}},
{0x03be, {1|U, {0x039e}}},
{0x03bf, {1|U, {0x039f}}},
@@ -6371,10 +6371,4 @@ OnigCodePoint CaseMappingSpecials[] = {
L(2)|0x0544, 0x056B, L(2)|0x0544, 0x053B,
L(2)|0x054E, 0x0576, L(2)|0x054E, 0x0546,
L(2)|0x0544, 0x056D, L(2)|0x0544, 0x053D,
- L(1)|0x01C5,
- L(1)|0x01C8,
- L(1)|0x01CB,
- L(1)|0x01F2,
- L(1)|0x0399,
- L(1)|0x039C,
};