diff options
Diffstat (limited to 'lib/unicode_normalize')
| -rw-r--r-- | lib/unicode_normalize/normalize.rb | 32 | ||||
| -rw-r--r-- | lib/unicode_normalize/tables.rb | 408 |
2 files changed, 426 insertions, 14 deletions
diff --git a/lib/unicode_normalize/normalize.rb b/lib/unicode_normalize/normalize.rb index b27cdadaaa..0447df8de7 100644 --- a/lib/unicode_normalize/normalize.rb +++ b/lib/unicode_normalize/normalize.rb @@ -20,7 +20,7 @@ require_relative 'tables' - +# :stopdoc: module UnicodeNormalize # :nodoc: ## Constant for max hash capacity to avoid DoS attack MAX_HASH_LENGTH = 18000 # enough for all test cases, otherwise tests get slow @@ -82,16 +82,22 @@ module UnicodeNormalize # :nodoc: ## Canonical Ordering def self.canonical_ordering_one(string) - sorting = string.each_char.collect { |c| [c, CLASS_TABLE[c]] } - (sorting.length-2).downto(0) do |i| # almost, but not exactly bubble sort - (0..i).each do |j| - later_class = sorting[j+1].last - if 0<later_class and later_class<sorting[j].last - sorting[j], sorting[j+1] = sorting[j+1], sorting[j] - end + result = '' + unordered = [] + chars = string.chars + n = chars.size + chars.each_with_index do |char, i| + ccc = CLASS_TABLE[char] + if ccc == 0 + unordered.sort!.each { result << chars[it % n] } + unordered.clear + result << char + else + unordered << ccc * n + i end end - return sorting.collect(&:first).join('') + unordered.sort!.each { result << chars[it % n] } + result end ## Normalization Forms for Patterns (not whole Strings) @@ -105,16 +111,22 @@ module UnicodeNormalize # :nodoc: start = nfd_string[0] last_class = CLASS_TABLE[start]-1 accents = '' + result = '' nfd_string[1..-1].each_char do |accent| accent_class = CLASS_TABLE[accent] if last_class<accent_class and composite = COMPOSITION_TABLE[start+accent] start = composite + elsif accent_class == 0 + result << start << accents + start = accent + accents = '' + last_class = -1 else accents << accent last_class = accent_class end end - hangul_comp_one(start+accents) + hangul_comp_one(result+start+accents) end def self.normalize(string, form = :nfc) diff --git a/lib/unicode_normalize/tables.rb b/lib/unicode_normalize/tables.rb index a36daa84e6..dd5d3499b8 100644 --- a/lib/unicode_normalize/tables.rb +++ b/lib/unicode_normalize/tables.rb @@ -1,6 +1,9 @@ # coding: us-ascii # frozen_string_literal: true +Encoding::UNICODE_VERSION == "17.0.0" or + raise "Unicode version mismatch: 17.0.0 expected but #{Encoding::UNICODE_VERSION}" + # automatically generated by template/unicode_norm_gen.tmpl module UnicodeNormalize # :nodoc: @@ -29,7 +32,8 @@ module UnicodeNormalize # :nodoc: "\u0825-\u0827" \ "\u0829-\u082D" \ "\u0859-\u085B" \ - "\u08D3-\u08E1" \ + "\u0897-\u089F" \ + "\u08CA-\u08E1" \ "\u08E3-\u08FF" \ "\u093C" \ "\u094D" \ @@ -50,6 +54,7 @@ module UnicodeNormalize # :nodoc: "\u0BBE" \ "\u0BCD" \ "\u0BD7" \ + "\u0C3C" \ "\u0C4D" \ "\u0C55\u0C56" \ "\u0CBC" \ @@ -83,7 +88,7 @@ module UnicodeNormalize # :nodoc: "\u1039\u103A" \ "\u108D" \ "\u135D-\u135F" \ - "\u1714" \ + "\u1714\u1715" \ "\u1734" \ "\u17D2" \ "\u17DD" \ @@ -94,6 +99,8 @@ module UnicodeNormalize # :nodoc: "\u1A75-\u1A7C" \ "\u1A7F" \ "\u1AB0-\u1ABD" \ + "\u1ABF-\u1ADD" \ + "\u1AE0-\u1AEB" \ "\u1B34\u1B35" \ "\u1B44" \ "\u1B6B-\u1B73" \ @@ -107,8 +114,7 @@ module UnicodeNormalize # :nodoc: "\u1CED" \ "\u1CF4" \ "\u1CF8\u1CF9" \ - "\u1DC0-\u1DF9" \ - "\u1DFB-\u1DFF" \ + "\u1DC0-\u1DFF" \ "\u20D0-\u20DC" \ "\u20E1" \ "\u20E5-\u20F0" \ @@ -122,6 +128,7 @@ module UnicodeNormalize # :nodoc: "\uA69E\uA69F" \ "\uA6F0\uA6F1" \ "\uA806" \ + "\uA82C" \ "\uA8C4" \ "\uA8E0-\uA8F1" \ "\uA92B-\uA92D" \ @@ -146,8 +153,14 @@ module UnicodeNormalize # :nodoc: "\u{10A3F}" \ "\u{10AE5}\u{10AE6}" \ "\u{10D24}-\u{10D27}" \ + "\u{10D69}-\u{10D6D}" \ + "\u{10EAB}\u{10EAC}" \ + "\u{10EFA}\u{10EFB}" \ + "\u{10EFD}-\u{10EFF}" \ "\u{10F46}-\u{10F50}" \ + "\u{10F82}-\u{10F85}" \ "\u{11046}" \ + "\u{11070}" \ "\u{1107F}" \ "\u{110B9}\u{110BA}" \ "\u{11100}-\u{11102}" \ @@ -164,6 +177,12 @@ module UnicodeNormalize # :nodoc: "\u{11357}" \ "\u{11366}-\u{1136C}" \ "\u{11370}-\u{11374}" \ + "\u{113B8}" \ + "\u{113BB}" \ + "\u{113C2}" \ + "\u{113C5}" \ + "\u{113C7}-\u{113C9}" \ + "\u{113CE}-\u{113D0}" \ "\u{11442}" \ "\u{11446}" \ "\u{1145E}" \ @@ -177,6 +196,9 @@ module UnicodeNormalize # :nodoc: "\u{116B6}\u{116B7}" \ "\u{1172B}" \ "\u{11839}\u{1183A}" \ + "\u{11930}" \ + "\u{1193D}\u{1193E}" \ + "\u{11943}" \ "\u{119E0}" \ "\u{11A34}" \ "\u{11A47}" \ @@ -185,8 +207,13 @@ module UnicodeNormalize # :nodoc: "\u{11D42}" \ "\u{11D44}\u{11D45}" \ "\u{11D97}" \ + "\u{11F41}\u{11F42}" \ + "\u{1611E}-\u{16129}" \ + "\u{1612F}" \ "\u{16AF0}-\u{16AF4}" \ "\u{16B30}-\u{16B36}" \ + "\u{16D67}\u{16D68}" \ + "\u{16FF0}\u{16FF1}" \ "\u{1BC9E}" \ "\u{1D165}-\u{1D169}" \ "\u{1D16D}-\u{1D172}" \ @@ -199,8 +226,16 @@ module UnicodeNormalize # :nodoc: "\u{1E01B}-\u{1E021}" \ "\u{1E023}\u{1E024}" \ "\u{1E026}-\u{1E02A}" \ + "\u{1E08F}" \ "\u{1E130}-\u{1E136}" \ + "\u{1E2AE}" \ "\u{1E2EC}-\u{1E2EF}" \ + "\u{1E4EC}-\u{1E4EF}" \ + "\u{1E5EE}\u{1E5EF}" \ + "\u{1E6E3}" \ + "\u{1E6E6}" \ + "\u{1E6EE}\u{1E6EF}" \ + "\u{1E6F5}" \ "\u{1E8D0}-\u{1E8D6}" \ "\u{1E944}-\u{1E94A}" \ "]" @@ -426,14 +461,25 @@ module UnicodeNormalize # :nodoc: "\uFB40\uFB41" \ "\uFB43\uFB44" \ "\uFB46-\uFB4E" \ + "\u{105C9}" \ + "\u{105E4}" \ "\u{1109A}" \ "\u{1109C}" \ "\u{110AB}" \ "\u{1112E}\u{1112F}" \ "\u{1134B}\u{1134C}" \ + "\u{11383}" \ + "\u{11385}" \ + "\u{1138E}" \ + "\u{11391}" \ + "\u{113C5}" \ + "\u{113C7}\u{113C8}" \ "\u{114BB}\u{114BC}" \ "\u{114BE}" \ "\u{115BA}\u{115BB}" \ + "\u{11938}" \ + "\u{16121}-\u{16128}" \ + "\u{16D68}-\u{16D6A}" \ "\u{1D15E}-\u{1D164}" \ "\u{1D1BB}-\u{1D1C0}" \ "\u{2F800}-\u{2FA1D}" \ @@ -597,13 +643,25 @@ module UnicodeNormalize # :nodoc: "\u30DB" \ "\u30EF-\u30F2" \ "\u30FD" \ + "\u{105D2}" \ + "\u{105DA}" \ "\u{11099}" \ "\u{1109B}" \ "\u{110A5}" \ "\u{11131}\u{11132}" \ "\u{11347}" \ + "\u{11382}" \ + "\u{11384}" \ + "\u{1138B}" \ + "\u{11390}" \ + "\u{113C2}" \ "\u{114B9}" \ "\u{115B8}\u{115B9}" \ + "\u{11935}" \ + "\u{1611E}" \ + "\u{16129}" \ + "\u{16D63}" \ + "\u{16D67}" \ "]?#{accents}+" \ "|#{'' # precomposed Hangul syllables }" \ @@ -874,6 +932,10 @@ module UnicodeNormalize # :nodoc: "\u30F4" \ "\u30F7-\u30FA" \ "\u30FD\u30FE" \ + "\u{105C9}" \ + "\u{105D2}" \ + "\u{105DA}" \ + "\u{105E4}" \ "\u{11099}-\u{1109C}" \ "\u{110A5}" \ "\u{110AB}" \ @@ -881,10 +943,23 @@ module UnicodeNormalize # :nodoc: "\u{11131}\u{11132}" \ "\u{11347}" \ "\u{1134B}\u{1134C}" \ + "\u{11382}-\u{11385}" \ + "\u{1138B}" \ + "\u{1138E}" \ + "\u{11390}\u{11391}" \ + "\u{113C2}" \ + "\u{113C5}" \ + "\u{113C7}\u{113C8}" \ "\u{114B9}" \ "\u{114BB}\u{114BC}" \ "\u{114BE}" \ "\u{115B8}-\u{115BB}" \ + "\u{11935}" \ + "\u{11938}" \ + "\u{1611E}" \ + "\u{16121}-\u{16129}" \ + "\u{16D63}" \ + "\u{16D67}-\u{16D6A}" \ "]?#{accents}+" \ "|#{'' # Hangul syllables with separate trailer }" \ @@ -1391,8 +1466,10 @@ module UnicodeNormalize # :nodoc: "\u3280-\u33FF" \ "\uA69C\uA69D" \ "\uA770" \ + "\uA7F1-\uA7F4" \ "\uA7F8\uA7F9" \ "\uAB5C-\uAB5F" \ + "\uAB69" \ "\uFB00-\uFB06" \ "\uFB13-\uFB17" \ "\uFB20-\uFB29" \ @@ -1416,6 +1493,10 @@ module UnicodeNormalize # :nodoc: "\uFFDA-\uFFDC" \ "\uFFE0-\uFFE6" \ "\uFFE8-\uFFEE" \ + "\u{10781}-\u{10785}" \ + "\u{10787}-\u{107B0}" \ + "\u{107B2}-\u{107BA}" \ + "\u{1CCD6}-\u{1CCF9}" \ "\u{1D400}-\u{1D454}" \ "\u{1D456}-\u{1D49C}" \ "\u{1D49E}\u{1D49F}" \ @@ -1437,6 +1518,7 @@ module UnicodeNormalize # :nodoc: "\u{1D552}-\u{1D6A5}" \ "\u{1D6A8}-\u{1D7CB}" \ "\u{1D7CE}-\u{1D7FF}" \ + "\u{1E030}-\u{1E06D}" \ "\u{1EE00}-\u{1EE03}" \ "\u{1EE05}-\u{1EE1F}" \ "\u{1EE21}\u{1EE22}" \ @@ -1479,6 +1561,7 @@ module UnicodeNormalize # :nodoc: "\u{1F210}-\u{1F23B}" \ "\u{1F240}-\u{1F248}" \ "\u{1F250}\u{1F251}" \ + "\u{1FBF0}-\u{1FBF9}" \ "]" class_table = { @@ -1763,6 +1846,24 @@ module UnicodeNormalize # :nodoc: "\u0859"=>220, "\u085A"=>220, "\u085B"=>220, + "\u0897"=>230, + "\u0898"=>230, + "\u0899"=>220, + "\u089A"=>220, + "\u089B"=>220, + "\u089C"=>230, + "\u089D"=>230, + "\u089E"=>230, + "\u089F"=>230, + "\u08CA"=>230, + "\u08CB"=>230, + "\u08CC"=>230, + "\u08CD"=>230, + "\u08CE"=>230, + "\u08CF"=>220, + "\u08D0"=>220, + "\u08D1"=>220, + "\u08D2"=>220, "\u08D3"=>220, "\u08D4"=>230, "\u08D5"=>230, @@ -1823,6 +1924,7 @@ module UnicodeNormalize # :nodoc: "\u0B3C"=>7, "\u0B4D"=>9, "\u0BCD"=>9, + "\u0C3C"=>7, "\u0C4D"=>9, "\u0C55"=>84, "\u0C56"=>91, @@ -1873,6 +1975,7 @@ module UnicodeNormalize # :nodoc: "\u135E"=>230, "\u135F"=>230, "\u1714"=>9, + "\u1715"=>9, "\u1734"=>9, "\u17D2"=>9, "\u17DD"=>230, @@ -1906,6 +2009,49 @@ module UnicodeNormalize # :nodoc: "\u1ABB"=>230, "\u1ABC"=>230, "\u1ABD"=>220, + "\u1ABF"=>220, + "\u1AC0"=>220, + "\u1AC1"=>230, + "\u1AC2"=>230, + "\u1AC3"=>220, + "\u1AC4"=>220, + "\u1AC5"=>230, + "\u1AC6"=>230, + "\u1AC7"=>230, + "\u1AC8"=>230, + "\u1AC9"=>230, + "\u1ACA"=>220, + "\u1ACB"=>230, + "\u1ACC"=>230, + "\u1ACD"=>230, + "\u1ACE"=>230, + "\u1ACF"=>230, + "\u1AD0"=>230, + "\u1AD1"=>230, + "\u1AD2"=>230, + "\u1AD3"=>230, + "\u1AD4"=>230, + "\u1AD5"=>230, + "\u1AD6"=>230, + "\u1AD7"=>230, + "\u1AD8"=>230, + "\u1AD9"=>230, + "\u1ADA"=>230, + "\u1ADB"=>230, + "\u1ADC"=>230, + "\u1ADD"=>220, + "\u1AE0"=>230, + "\u1AE1"=>230, + "\u1AE2"=>230, + "\u1AE3"=>230, + "\u1AE4"=>230, + "\u1AE5"=>230, + "\u1AE6"=>220, + "\u1AE7"=>230, + "\u1AE8"=>230, + "\u1AE9"=>230, + "\u1AEA"=>230, + "\u1AEB"=>234, "\u1B34"=>7, "\u1B44"=>9, "\u1B6B"=>230, @@ -2008,6 +2154,7 @@ module UnicodeNormalize # :nodoc: "\u1DF7"=>228, "\u1DF8"=>228, "\u1DF9"=>220, + "\u1DFA"=>218, "\u1DFB"=>230, "\u1DFC"=>233, "\u1DFD"=>220, @@ -2099,6 +2246,7 @@ module UnicodeNormalize # :nodoc: "\uA6F0"=>230, "\uA6F1"=>230, "\uA806"=>9, + "\uA82C"=>9, "\uA8C4"=>9, "\uA8E0"=>230, "\uA8E1"=>230, @@ -2171,6 +2319,18 @@ module UnicodeNormalize # :nodoc: "\u{10D25}"=>230, "\u{10D26}"=>230, "\u{10D27}"=>230, + "\u{10D69}"=>230, + "\u{10D6A}"=>230, + "\u{10D6B}"=>230, + "\u{10D6C}"=>230, + "\u{10D6D}"=>230, + "\u{10EAB}"=>230, + "\u{10EAC}"=>230, + "\u{10EFA}"=>220, + "\u{10EFB}"=>220, + "\u{10EFD}"=>220, + "\u{10EFE}"=>220, + "\u{10EFF}"=>220, "\u{10F46}"=>220, "\u{10F47}"=>220, "\u{10F48}"=>230, @@ -2182,7 +2342,12 @@ module UnicodeNormalize # :nodoc: "\u{10F4E}"=>220, "\u{10F4F}"=>220, "\u{10F50}"=>220, + "\u{10F82}"=>230, + "\u{10F83}"=>220, + "\u{10F84}"=>230, + "\u{10F85}"=>220, "\u{11046}"=>9, + "\u{11070}"=>9, "\u{1107F}"=>9, "\u{110B9}"=>9, "\u{110BA}"=>7, @@ -2213,6 +2378,9 @@ module UnicodeNormalize # :nodoc: "\u{11372}"=>230, "\u{11373}"=>230, "\u{11374}"=>230, + "\u{113CE}"=>9, + "\u{113CF}"=>9, + "\u{113D0}"=>9, "\u{11442}"=>9, "\u{11446}"=>7, "\u{1145E}"=>230, @@ -2226,6 +2394,9 @@ module UnicodeNormalize # :nodoc: "\u{1172B}"=>9, "\u{11839}"=>9, "\u{1183A}"=>7, + "\u{1193D}"=>9, + "\u{1193E}"=>9, + "\u{11943}"=>7, "\u{119E0}"=>9, "\u{11A34}"=>9, "\u{11A47}"=>9, @@ -2235,6 +2406,9 @@ module UnicodeNormalize # :nodoc: "\u{11D44}"=>9, "\u{11D45}"=>9, "\u{11D97}"=>9, + "\u{11F41}"=>9, + "\u{11F42}"=>9, + "\u{1612F}"=>9, "\u{16AF0}"=>1, "\u{16AF1}"=>1, "\u{16AF2}"=>1, @@ -2247,6 +2421,8 @@ module UnicodeNormalize # :nodoc: "\u{16B34}"=>230, "\u{16B35}"=>230, "\u{16B36}"=>230, + "\u{16FF0}"=>6, + "\u{16FF1}"=>6, "\u{1BC9E}"=>1, "\u{1D165}"=>216, "\u{1D166}"=>216, @@ -2319,6 +2495,7 @@ module UnicodeNormalize # :nodoc: "\u{1E028}"=>230, "\u{1E029}"=>230, "\u{1E02A}"=>230, + "\u{1E08F}"=>230, "\u{1E130}"=>230, "\u{1E131}"=>230, "\u{1E132}"=>230, @@ -2326,10 +2503,22 @@ module UnicodeNormalize # :nodoc: "\u{1E134}"=>230, "\u{1E135}"=>230, "\u{1E136}"=>230, + "\u{1E2AE}"=>230, "\u{1E2EC}"=>230, "\u{1E2ED}"=>230, "\u{1E2EE}"=>230, "\u{1E2EF}"=>230, + "\u{1E4EC}"=>232, + "\u{1E4ED}"=>232, + "\u{1E4EE}"=>220, + "\u{1E4EF}"=>230, + "\u{1E5EE}"=>230, + "\u{1E5EF}"=>220, + "\u{1E6E3}"=>230, + "\u{1E6E6}"=>230, + "\u{1E6EE}"=>230, + "\u{1E6EF}"=>230, + "\u{1E6F5}"=>230, "\u{1E8D0}"=>220, "\u{1E8D1}"=>220, "\u{1E8D2}"=>220, @@ -3842,6 +4031,8 @@ module UnicodeNormalize # :nodoc: "\uFB4C"=>"\u05D1\u05BF", "\uFB4D"=>"\u05DB\u05BF", "\uFB4E"=>"\u05E4\u05BF", + "\u{105C9}"=>"\u{105D2}\u0307", + "\u{105E4}"=>"\u{105DA}\u0307", "\u{1109A}"=>"\u{11099}\u{110BA}", "\u{1109C}"=>"\u{1109B}\u{110BA}", "\u{110AB}"=>"\u{110A5}\u{110BA}", @@ -3849,11 +4040,30 @@ module UnicodeNormalize # :nodoc: "\u{1112F}"=>"\u{11132}\u{11127}", "\u{1134B}"=>"\u{11347}\u{1133E}", "\u{1134C}"=>"\u{11347}\u{11357}", + "\u{11383}"=>"\u{11382}\u{113C9}", + "\u{11385}"=>"\u{11384}\u{113BB}", + "\u{1138E}"=>"\u{1138B}\u{113C2}", + "\u{11391}"=>"\u{11390}\u{113C9}", + "\u{113C5}"=>"\u{113C2}\u{113C2}", + "\u{113C7}"=>"\u{113C2}\u{113B8}", + "\u{113C8}"=>"\u{113C2}\u{113C9}", "\u{114BB}"=>"\u{114B9}\u{114BA}", "\u{114BC}"=>"\u{114B9}\u{114B0}", "\u{114BE}"=>"\u{114B9}\u{114BD}", "\u{115BA}"=>"\u{115B8}\u{115AF}", "\u{115BB}"=>"\u{115B9}\u{115AF}", + "\u{11938}"=>"\u{11935}\u{11930}", + "\u{16121}"=>"\u{1611E}\u{1611E}", + "\u{16122}"=>"\u{1611E}\u{16129}", + "\u{16123}"=>"\u{1611E}\u{1611F}", + "\u{16124}"=>"\u{16129}\u{1611F}", + "\u{16125}"=>"\u{1611E}\u{16120}", + "\u{16126}"=>"\u{1611E}\u{1611E}\u{1611F}", + "\u{16127}"=>"\u{1611E}\u{16129}\u{1611F}", + "\u{16128}"=>"\u{1611E}\u{1611E}\u{16120}", + "\u{16D68}"=>"\u{16D67}\u{16D67}", + "\u{16D69}"=>"\u{16D63}\u{16D67}", + "\u{16D6A}"=>"\u{16D63}\u{16D67}\u{16D67}", "\u{1D15E}"=>"\u{1D157}\u{1D165}", "\u{1D15F}"=>"\u{1D158}\u{1D165}", "\u{1D160}"=>"\u{1D158}\u{1D165}\u{1D16E}", @@ -5752,12 +5962,17 @@ module UnicodeNormalize # :nodoc: "\uA69C"=>"\u044A", "\uA69D"=>"\u044C", "\uA770"=>"\uA76F", + "\uA7F1"=>"S", + "\uA7F2"=>"C", + "\uA7F3"=>"F", + "\uA7F4"=>"Q", "\uA7F8"=>"\u0126", "\uA7F9"=>"\u0153", "\uAB5C"=>"\uA727", "\uAB5D"=>"\uAB37", "\uAB5E"=>"\u026B", "\uAB5F"=>"\uAB52", + "\uAB69"=>"\u028D", "\uFB00"=>"ff", "\uFB01"=>"fi", "\uFB02"=>"fl", @@ -6803,6 +7018,98 @@ module UnicodeNormalize # :nodoc: "\uFFEC"=>"\u2193", "\uFFED"=>"\u25A0", "\uFFEE"=>"\u25CB", + "\u{10781}"=>"\u02D0", + "\u{10782}"=>"\u02D1", + "\u{10783}"=>"\u00E6", + "\u{10784}"=>"\u0299", + "\u{10785}"=>"\u0253", + "\u{10787}"=>"\u02A3", + "\u{10788}"=>"\uAB66", + "\u{10789}"=>"\u02A5", + "\u{1078A}"=>"\u02A4", + "\u{1078B}"=>"\u0256", + "\u{1078C}"=>"\u0257", + "\u{1078D}"=>"\u1D91", + "\u{1078E}"=>"\u0258", + "\u{1078F}"=>"\u025E", + "\u{10790}"=>"\u02A9", + "\u{10791}"=>"\u0264", + "\u{10792}"=>"\u0262", + "\u{10793}"=>"\u0260", + "\u{10794}"=>"\u029B", + "\u{10795}"=>"\u0127", + "\u{10796}"=>"\u029C", + "\u{10797}"=>"\u0267", + "\u{10798}"=>"\u0284", + "\u{10799}"=>"\u02AA", + "\u{1079A}"=>"\u02AB", + "\u{1079B}"=>"\u026C", + "\u{1079C}"=>"\u{1DF04}", + "\u{1079D}"=>"\uA78E", + "\u{1079E}"=>"\u026E", + "\u{1079F}"=>"\u{1DF05}", + "\u{107A0}"=>"\u028E", + "\u{107A1}"=>"\u{1DF06}", + "\u{107A2}"=>"\u00F8", + "\u{107A3}"=>"\u0276", + "\u{107A4}"=>"\u0277", + "\u{107A5}"=>"q", + "\u{107A6}"=>"\u027A", + "\u{107A7}"=>"\u{1DF08}", + "\u{107A8}"=>"\u027D", + "\u{107A9}"=>"\u027E", + "\u{107AA}"=>"\u0280", + "\u{107AB}"=>"\u02A8", + "\u{107AC}"=>"\u02A6", + "\u{107AD}"=>"\uAB67", + "\u{107AE}"=>"\u02A7", + "\u{107AF}"=>"\u0288", + "\u{107B0}"=>"\u2C71", + "\u{107B2}"=>"\u028F", + "\u{107B3}"=>"\u02A1", + "\u{107B4}"=>"\u02A2", + "\u{107B5}"=>"\u0298", + "\u{107B6}"=>"\u01C0", + "\u{107B7}"=>"\u01C1", + "\u{107B8}"=>"\u01C2", + "\u{107B9}"=>"\u{1DF0A}", + "\u{107BA}"=>"\u{1DF1E}", + "\u{1CCD6}"=>"A", + "\u{1CCD7}"=>"B", + "\u{1CCD8}"=>"C", + "\u{1CCD9}"=>"D", + "\u{1CCDA}"=>"E", + "\u{1CCDB}"=>"F", + "\u{1CCDC}"=>"G", + "\u{1CCDD}"=>"H", + "\u{1CCDE}"=>"I", + "\u{1CCDF}"=>"J", + "\u{1CCE0}"=>"K", + "\u{1CCE1}"=>"L", + "\u{1CCE2}"=>"M", + "\u{1CCE3}"=>"N", + "\u{1CCE4}"=>"O", + "\u{1CCE5}"=>"P", + "\u{1CCE6}"=>"Q", + "\u{1CCE7}"=>"R", + "\u{1CCE8}"=>"S", + "\u{1CCE9}"=>"T", + "\u{1CCEA}"=>"U", + "\u{1CCEB}"=>"V", + "\u{1CCEC}"=>"W", + "\u{1CCED}"=>"X", + "\u{1CCEE}"=>"Y", + "\u{1CCEF}"=>"Z", + "\u{1CCF0}"=>"0", + "\u{1CCF1}"=>"1", + "\u{1CCF2}"=>"2", + "\u{1CCF3}"=>"3", + "\u{1CCF4}"=>"4", + "\u{1CCF5}"=>"5", + "\u{1CCF6}"=>"6", + "\u{1CCF7}"=>"7", + "\u{1CCF8}"=>"8", + "\u{1CCF9}"=>"9", "\u{1D400}"=>"A", "\u{1D401}"=>"B", "\u{1D402}"=>"C", @@ -7799,6 +8106,68 @@ module UnicodeNormalize # :nodoc: "\u{1D7FD}"=>"7", "\u{1D7FE}"=>"8", "\u{1D7FF}"=>"9", + "\u{1E030}"=>"\u0430", + "\u{1E031}"=>"\u0431", + "\u{1E032}"=>"\u0432", + "\u{1E033}"=>"\u0433", + "\u{1E034}"=>"\u0434", + "\u{1E035}"=>"\u0435", + "\u{1E036}"=>"\u0436", + "\u{1E037}"=>"\u0437", + "\u{1E038}"=>"\u0438", + "\u{1E039}"=>"\u043A", + "\u{1E03A}"=>"\u043B", + "\u{1E03B}"=>"\u043C", + "\u{1E03C}"=>"\u043E", + "\u{1E03D}"=>"\u043F", + "\u{1E03E}"=>"\u0440", + "\u{1E03F}"=>"\u0441", + "\u{1E040}"=>"\u0442", + "\u{1E041}"=>"\u0443", + "\u{1E042}"=>"\u0444", + "\u{1E043}"=>"\u0445", + "\u{1E044}"=>"\u0446", + "\u{1E045}"=>"\u0447", + "\u{1E046}"=>"\u0448", + "\u{1E047}"=>"\u044B", + "\u{1E048}"=>"\u044D", + "\u{1E049}"=>"\u044E", + "\u{1E04A}"=>"\uA689", + "\u{1E04B}"=>"\u04D9", + "\u{1E04C}"=>"\u0456", + "\u{1E04D}"=>"\u0458", + "\u{1E04E}"=>"\u04E9", + "\u{1E04F}"=>"\u04AF", + "\u{1E050}"=>"\u04CF", + "\u{1E051}"=>"\u0430", + "\u{1E052}"=>"\u0431", + "\u{1E053}"=>"\u0432", + "\u{1E054}"=>"\u0433", + "\u{1E055}"=>"\u0434", + "\u{1E056}"=>"\u0435", + "\u{1E057}"=>"\u0436", + "\u{1E058}"=>"\u0437", + "\u{1E059}"=>"\u0438", + "\u{1E05A}"=>"\u043A", + "\u{1E05B}"=>"\u043B", + "\u{1E05C}"=>"\u043E", + "\u{1E05D}"=>"\u043F", + "\u{1E05E}"=>"\u0441", + "\u{1E05F}"=>"\u0443", + "\u{1E060}"=>"\u0444", + "\u{1E061}"=>"\u0445", + "\u{1E062}"=>"\u0446", + "\u{1E063}"=>"\u0447", + "\u{1E064}"=>"\u0448", + "\u{1E065}"=>"\u044A", + "\u{1E066}"=>"\u044B", + "\u{1E067}"=>"\u0491", + "\u{1E068}"=>"\u0456", + "\u{1E069}"=>"\u0455", + "\u{1E06A}"=>"\u045F", + "\u{1E06B}"=>"\u04AB", + "\u{1E06C}"=>"\uA651", + "\u{1E06D}"=>"\u04B1", "\u{1EE00}"=>"\u0627", "\u{1EE01}"=>"\u0628", "\u{1EE02}"=>"\u062C", @@ -8076,6 +8445,16 @@ module UnicodeNormalize # :nodoc: "\u{1F248}"=>"\u3014\u6557\u3015", "\u{1F250}"=>"\u5F97", "\u{1F251}"=>"\u53EF", + "\u{1FBF0}"=>"0", + "\u{1FBF1}"=>"1", + "\u{1FBF2}"=>"2", + "\u{1FBF3}"=>"3", + "\u{1FBF4}"=>"4", + "\u{1FBF5}"=>"5", + "\u{1FBF6}"=>"6", + "\u{1FBF7}"=>"7", + "\u{1FBF8}"=>"8", + "\u{1FBF9}"=>"9", "\u0385"=>" \u0308\u0301", "\u03D3"=>"\u03A5\u0301", "\u03D4"=>"\u03A5\u0308", @@ -9023,6 +9402,8 @@ module UnicodeNormalize # :nodoc: "\u30F1\u3099"=>"\u30F9", "\u30F2\u3099"=>"\u30FA", "\u30FD\u3099"=>"\u30FE", + "\u{105D2}\u0307"=>"\u{105C9}", + "\u{105DA}\u0307"=>"\u{105E4}", "\u{11099}\u{110BA}"=>"\u{1109A}", "\u{1109B}\u{110BA}"=>"\u{1109C}", "\u{110A5}\u{110BA}"=>"\u{110AB}", @@ -9030,10 +9411,29 @@ module UnicodeNormalize # :nodoc: "\u{11132}\u{11127}"=>"\u{1112F}", "\u{11347}\u{1133E}"=>"\u{1134B}", "\u{11347}\u{11357}"=>"\u{1134C}", + "\u{11382}\u{113C9}"=>"\u{11383}", + "\u{11384}\u{113BB}"=>"\u{11385}", + "\u{1138B}\u{113C2}"=>"\u{1138E}", + "\u{11390}\u{113C9}"=>"\u{11391}", + "\u{113C2}\u{113C2}"=>"\u{113C5}", + "\u{113C2}\u{113B8}"=>"\u{113C7}", + "\u{113C2}\u{113C9}"=>"\u{113C8}", "\u{114B9}\u{114BA}"=>"\u{114BB}", "\u{114B9}\u{114B0}"=>"\u{114BC}", "\u{114B9}\u{114BD}"=>"\u{114BE}", "\u{115B8}\u{115AF}"=>"\u{115BA}", "\u{115B9}\u{115AF}"=>"\u{115BB}", + "\u{11935}\u{11930}"=>"\u{11938}", + "\u{1611E}\u{1611E}"=>"\u{16121}", + "\u{1611E}\u{16129}"=>"\u{16122}", + "\u{1611E}\u{1611F}"=>"\u{16123}", + "\u{16129}\u{1611F}"=>"\u{16124}", + "\u{1611E}\u{16120}"=>"\u{16125}", + "\u{16121}\u{1611F}"=>"\u{16126}", + "\u{16122}\u{1611F}"=>"\u{16127}", + "\u{16121}\u{16120}"=>"\u{16128}", + "\u{16D67}\u{16D67}"=>"\u{16D68}", + "\u{16D63}\u{16D67}"=>"\u{16D69}", + "\u{16D69}\u{16D67}"=>"\u{16D6A}", }.freeze end |
