From a7acec675051f8ed49bbc3ab992ac668e5c29fcf Mon Sep 17 00:00:00 2001 From: duerst Date: Sat, 28 Jul 2018 09:44:33 +0000 Subject: fix range check for Hangul jamo trailers in Unicode normalization * lib/unicode_normalize/normalize.rb: Fix the range check for trailing Hangul jamo characters in Unicode normalization. Different from leading or vowel jamos, where LBASE and VBASE are actual characters, a value equal to TBASE expresses the absence of a trailing jamo. This fix is technically correct, but there was no bug because the regular expressions in lib/unicode_normalize/tables.rb eliminate jamos equal to TBASE from normalization processing. * test/test_unicode_normalize.rb: Add preventive test test_no_trailing_jamo based on https://github.com/python/cpython/commit/d134809cd3764c6a634eab7bb8995e3e2eff14d5 just for the case we ever get a regression. This closes issue #14934, thanks to MaLin (Lin Ma) for reporting. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@64087 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/unicode_normalize/normalize.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/unicode_normalize') diff --git a/lib/unicode_normalize/normalize.rb b/lib/unicode_normalize/normalize.rb index a2f7a29c88..460f784125 100644 --- a/lib/unicode_normalize/normalize.rb +++ b/lib/unicode_normalize/normalize.rb @@ -70,7 +70,7 @@ module UnicodeNormalize # :nodoc: if length>1 and 0 <= (lead =string[0].ord-LBASE) and lead < LCOUNT and 0 <= (vowel=string[1].ord-VBASE) and vowel < VCOUNT lead_vowel = SBASE + (lead * VCOUNT + vowel) * TCOUNT - if length>2 and 0 <= (trail=string[2].ord-TBASE) and trail < TCOUNT + if length>2 and 0 < (trail=string[2].ord-TBASE) and trail < TCOUNT (lead_vowel + trail).chr(Encoding::UTF_8) + string[3..-1] else lead_vowel.chr(Encoding::UTF_8) + string[2..-1] -- cgit v1.2.3