diff options
| -rw-r--r-- | lib/unicode_normalize/normalize.rb | 22 | ||||
| -rw-r--r-- | test/test_unicode_normalize.rb | 19 | ||||
| -rw-r--r-- | version.h | 2 |
3 files changed, 34 insertions, 9 deletions
diff --git a/lib/unicode_normalize/normalize.rb b/lib/unicode_normalize/normalize.rb index 1caf2cc8c8..4c9fc4caa7 100644 --- a/lib/unicode_normalize/normalize.rb +++ b/lib/unicode_normalize/normalize.rb @@ -82,16 +82,22 @@ module UnicodeNormalize # :nodoc: ## Canonical Ordering def self.canonical_ordering_one(string) - sorting = string.each_char.collect { |c| [c, CLASS_TABLE[c]] } - (sorting.length-2).downto(0) do |i| # almost, but not exactly bubble sort - (0..i).each do |j| - later_class = sorting[j+1].last - if 0<later_class and later_class<sorting[j].last - sorting[j], sorting[j+1] = sorting[j+1], sorting[j] - end + result = '' + unordered = [] + chars = string.chars + n = chars.size + chars.each_with_index do |char, i| + ccc = CLASS_TABLE[char] + if ccc == 0 + unordered.sort!.each { result << chars[it % n] } + unordered.clear + result << char + else + unordered << ccc * n + i end end - return sorting.collect(&:first).join('') + unordered.sort!.each { result << chars[it % n] } + result end ## Normalization Forms for Patterns (not whole Strings) diff --git a/test/test_unicode_normalize.rb b/test/test_unicode_normalize.rb index 8789ed92d2..98f46551d3 100644 --- a/test/test_unicode_normalize.rb +++ b/test/test_unicode_normalize.rb @@ -209,4 +209,23 @@ class TestUnicodeNormalize assert_equal true, ascii_string.unicode_normalized?(:nfkc) assert_equal true, ascii_string.unicode_normalized?(:nfkd) end + + def test_canonical_ordering + a = "\u03B1\u0313\u0300\u0345" + a_unordered1 = "\u03B1\u0345\u0313\u0300" + a_unordered2 = "\u03B1\u0313\u0345\u0300" + u1 = "U\u0308\u0304" + u2 = "U\u0304\u0308" + s = "s\u0323\u0307" + s_unordered = "s\u0307\u0323" + o = "\u{1611e}\u{1611e}\u{1611f}" + # Actual cases called through String#unicode_normalize + assert_equal(s + o, UnicodeNormalize.canonical_ordering_one(s_unordered + o)) + assert_equal(a[1..], UnicodeNormalize.canonical_ordering_one(a_unordered1[1..])) + assert_equal(a[1..] + o, UnicodeNormalize.canonical_ordering_one(a_unordered2[1..] + o)) + # Artificial cases + assert_equal(a + u1 + o + u2 + s, UnicodeNormalize.canonical_ordering_one(a + u1 + o + u2 + s)) + assert_equal(s[1..] + a + a, UnicodeNormalize.canonical_ordering_one(s_unordered[1..] + a_unordered1 + a_unordered2)) + assert_equal(o + s + u1 + a + o + a + u2 + o, UnicodeNormalize.canonical_ordering_one(o + s_unordered + u1 + a_unordered1 + o + a_unordered2 + u2 + o)) + end end @@ -11,7 +11,7 @@ # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR #define RUBY_VERSION_TEENY 10 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR -#define RUBY_PATCHLEVEL 187 +#define RUBY_PATCHLEVEL 188 #include "ruby/version.h" #include "ruby/internal/abi.h" |
