diff options
-rw-r--r-- | lib/unicode_normalize/normalize.rb | 2 | ||||
-rw-r--r-- | test/test_unicode_normalize.rb | 7 |
2 files changed, 8 insertions, 1 deletions
diff --git a/lib/unicode_normalize/normalize.rb b/lib/unicode_normalize/normalize.rb index a2f7a29c88..460f784125 100644 --- a/lib/unicode_normalize/normalize.rb +++ b/lib/unicode_normalize/normalize.rb @@ -70,7 +70,7 @@ module UnicodeNormalize # :nodoc: if length>1 and 0 <= (lead =string[0].ord-LBASE) and lead < LCOUNT and 0 <= (vowel=string[1].ord-VBASE) and vowel < VCOUNT lead_vowel = SBASE + (lead * VCOUNT + vowel) * TCOUNT - if length>2 and 0 <= (trail=string[2].ord-TBASE) and trail < TCOUNT + if length>2 and 0 < (trail=string[2].ord-TBASE) and trail < TCOUNT (lead_vowel + trail).chr(Encoding::UTF_8) + string[3..-1] else lead_vowel.chr(Encoding::UTF_8) + string[2..-1] diff --git a/test/test_unicode_normalize.rb b/test/test_unicode_normalize.rb index 0fc84343d0..fbd979206a 100644 --- a/test/test_unicode_normalize.rb +++ b/test/test_unicode_normalize.rb @@ -167,6 +167,13 @@ class TestUnicodeNormalize assert_equal "\u1100\u1161\u11A8", "\uAC00\u11A8".unicode_normalize(:nfd) end + # preventive tests for (non-)bug #14934 + def test_no_trailing_jamo + assert_equal "\u1100\u1176\u11a8", "\u1100\u1176\u11a8".unicode_normalize(:nfc) + assert_equal "\uae30\u11a7", "\u1100\u1175\u11a7".unicode_normalize(:nfc) + assert_equal "\uae30\u11c3", "\u1100\u1175\u11c3".unicode_normalize(:nfc) + end + def test_hangul_plus_accents assert_equal "\uAC00\u0323\u0300", "\uAC00\u0300\u0323".unicode_normalize(:nfc) assert_equal "\uAC00\u0323\u0300", "\u1100\u1161\u0300\u0323".unicode_normalize(:nfc) |