summaryrefslogtreecommitdiff
path: root/enc/unicode/case-folding.rb
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-03-11 07:11:27 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-03-11 07:11:27 +0000
commit59766643db17f8dbfe518cafa20f6ba36a9b8d9b (patch)
tree07f615f30e64f8db066d53cdf7b03a18949bdb1d /enc/unicode/case-folding.rb
parenta2b88f53257500c6a7d08094b821e8e4fb19369a (diff)
* enc/unicode/case-folding.rb, casefold.h: Streamlining approach to
case mapping data not available from case folding by unifying all three cases (special title, special upper, special lower). * enc/unicode.c: Adjust macro names for above (macros are currently inactive). (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54085 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/unicode/case-folding.rb')
-rwxr-xr-xenc/unicode/case-folding.rb41
1 files changed, 28 insertions, 13 deletions
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index cd14b6e6f1..d309dd6d15 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -177,7 +177,7 @@ class CaseFolding
dest.print lookup_hash(name, "CodePointList2", data)
# TitleCase
- dest.print mapping_data.titlecase_output
+ dest.print mapping_data.specials_output
end
def debug!
@@ -203,7 +203,8 @@ end
class CaseMapping
def initialize (mapping_directory)
@mappings = {}
- @titlecase = []
+ @specials = []
+ @specials_length = 0
IO.readlines(File.expand_path('UnicodeData.txt', mapping_directory), encoding: Encoding::ASCII_8BIT).each do |line|
next if line =~ /^</
code, _1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11, upper, lower, title = line.chomp.split ';'
@@ -237,12 +238,24 @@ class CaseMapping
if item
flags += '|U' if to==item.upper
flags += '|D' if to==item.lower
+ specials_index = nil
+ specials = []
unless item.upper == item.title
- unless title_index = @titlecase.find_index { |i| i.title==item.title }
- title_index = @titlecase.length
- @titlecase << item
- end
- flags += "|T(#{title_index})"
+ specials << item.title
+ flags += "|ST"
+ end
+ unless item.lower.nil? or item.lower==from or item.lower==to
+ specials << item.lower
+ flags += "|SL"
+ end
+ unless item.upper.nil? or item.upper==from or item.upper==to
+ specials << item.upper
+ flags += "|SU"
+ end
+ if specials.first
+ flags += "|I(#{@specials_length})"
+ @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+)
+ @specials << specials
end
end
flags
@@ -252,12 +265,14 @@ class CaseMapping
@debug = true
end
- def titlecase_output
- "CodePointList3 TitleCase[] = {\n" +
- @titlecase.map do |item|
- chars = item.title.split(/ /)
- ct = ' /* ' + Array(chars).map{|c|[c.to_i(16)].pack("U*")}.join(", ") + ' */' if @debug
- " {#{chars.length}, {#{chars.map {|c| "0x"+c }.join(', ')}#{ct}}},\n"
+ def specials_output
+ "OnigCodePoint CaseMappingSpecials[] = {\n" +
+ @specials.map do |sps|
+ ' ' + sps.map do |sp|
+ chars = sp.split(/ /)
+ ct = ' /* ' + Array(chars).map{|c|[c.to_i(16)].pack("U*")}.join(", ") + ' */' if @debug
+ " L(#{chars.length})|#{chars.map {|c| "0x"+c }.join(', ')}#{ct},"
+ end.join + "\n"
end.join + "};\n"
end