summaryrefslogtreecommitdiff
path: root/enc/unicode
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-07 13:10:20 (GMT)
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-07 13:10:20 (GMT)
commit49ca434bcf64f115bf1915f5287fbb61598ed773 (patch)
tree85636539b1cd2efbe18f905b9602ce5e9416e15b /enc/unicode
parent2c9274eff2e2c0451145116f68ac3be371ebd66e (diff)
* common.mk: Added two more precondition files for enc/unicode/casefold.h
* enc/unicode.c: Added shortening macros for enc/unicode/casefold.h * enc/unicode/case-folding.rb: Fixed file encoding for CaseFolding.txt to ASCII-8BIT (should fix some ci errors). Clarified usage. Created class MapItem. Partially implemented class CaseMapping. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53767 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/unicode')
-rwxr-xr-xenc/unicode/case-folding.rb35
1 files changed, 31 insertions, 4 deletions
diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb
index 33cbee5..e1293a7 100755
--- a/enc/unicode/case-folding.rb
+++ b/enc/unicode/case-folding.rb
@@ -1,10 +1,12 @@
#!/usr/bin/ruby
-# Usage:
+# Usage (for case folding only):
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
# $ ruby case-folding.rb CaseFolding.txt -o casefold.h
-# or:
+# or (for case folding and case mapping):
# $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
+# $ wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+# $ wget http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt
# $ ruby case-folding.rb -m . -o casefold.h
class CaseFolding
@@ -175,12 +177,37 @@ class CaseFolding
end
end
+class MapItem
+ def initialize(code, upper, lower, title)
+ @code = code
+ @upper = upper unless upper == ''
+ @lower = lower unless lower == ''
+ @title = title unless title == ''
+ end
+
+ def flags
+ "" # preliminary implementation
+ end
+end
+
class CaseMapping
def initialize (mapping_directory)
+ @mappings = {}
+ IO.readlines(File.expand_path('UnicodeData.txt', mapping_directory), encoding: Encoding::ASCII_8BIT).each do |line|
+ next if line =~ /</
+ code, _1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11, upper, lower, title = line.chomp.split ';'
+ unless upper and lower and title and (upper+lower+title)==''
+ @mappings[code] = MapItem.new(code, upper, lower, title)
+ end
+
+ end
+
+ # IO.readlines(File.expand_path('SpecialCasing.txt', mapping_directory))
end
def flags(from)
- "" # preliminary implementation
+ to = @mappings[from]
+ to ? to.flags : ""
end
def self.load(*args)
@@ -216,7 +243,7 @@ if $0 == __FILE__
warn "Either specify directory or individual file, but not both."
exit
end
- filename = File.expand_path("CaseFolding.txt", mapping_directory)
+ filename = File.expand_path('CaseFolding.txt', mapping_directory)
mapping_data = CaseMapping.load(mapping_directory)
end
filename ||= ARGV[0] || 'CaseFolding.txt'