diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-03-11 07:11:27 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-03-11 07:11:27 +0000 |
commit | 59766643db17f8dbfe518cafa20f6ba36a9b8d9b (patch) | |
tree | 07f615f30e64f8db066d53cdf7b03a18949bdb1d /enc/unicode/case-folding.rb | |
parent | a2b88f53257500c6a7d08094b821e8e4fb19369a (diff) |
* enc/unicode/case-folding.rb, casefold.h: Streamlining approach to
case mapping data not available from case folding by unifying all
three cases (special title, special upper, special lower).
* enc/unicode.c: Adjust macro names for above (macros are currently inactive).
(with Kimihito Matsui)
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54085 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/unicode/case-folding.rb')
-rwxr-xr-x | enc/unicode/case-folding.rb | 41 |
1 files changed, 28 insertions, 13 deletions
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb index cd14b6e6f1..d309dd6d15 100755 --- a/enc/unicode/case-folding.rb +++ b/enc/unicode/case-folding.rb @@ -177,7 +177,7 @@ class CaseFolding dest.print lookup_hash(name, "CodePointList2", data) # TitleCase - dest.print mapping_data.titlecase_output + dest.print mapping_data.specials_output end def debug! @@ -203,7 +203,8 @@ end class CaseMapping def initialize (mapping_directory) @mappings = {} - @titlecase = [] + @specials = [] + @specials_length = 0 IO.readlines(File.expand_path('UnicodeData.txt', mapping_directory), encoding: Encoding::ASCII_8BIT).each do |line| next if line =~ /^</ code, _1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11, upper, lower, title = line.chomp.split ';' @@ -237,12 +238,24 @@ class CaseMapping if item flags += '|U' if to==item.upper flags += '|D' if to==item.lower + specials_index = nil + specials = [] unless item.upper == item.title - unless title_index = @titlecase.find_index { |i| i.title==item.title } - title_index = @titlecase.length - @titlecase << item - end - flags += "|T(#{title_index})" + specials << item.title + flags += "|ST" + end + unless item.lower.nil? or item.lower==from or item.lower==to + specials << item.lower + flags += "|SL" + end + unless item.upper.nil? or item.upper==from or item.upper==to + specials << item.upper + flags += "|SU" + end + if specials.first + flags += "|I(#{@specials_length})" + @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+) + @specials << specials end end flags @@ -252,12 +265,14 @@ class CaseMapping @debug = true end - def titlecase_output - "CodePointList3 TitleCase[] = {\n" + - @titlecase.map do |item| - chars = item.title.split(/ /) - ct = ' /* ' + Array(chars).map{|c|[c.to_i(16)].pack("U*")}.join(", ") + ' */' if @debug - " {#{chars.length}, {#{chars.map {|c| "0x"+c }.join(', ')}#{ct}}},\n" + def specials_output + "OnigCodePoint CaseMappingSpecials[] = {\n" + + @specials.map do |sps| + ' ' + sps.map do |sp| + chars = sp.split(/ /) + ct = ' /* ' + Array(chars).map{|c|[c.to_i(16)].pack("U*")}.join(", ") + ' */' if @debug + " L(#{chars.length})|#{chars.map {|c| "0x"+c }.join(', ')}#{ct}," + end.join + "\n" end.join + "};\n" end |