From 866c79e2de4567d71f432652c58b48fe50916f37 Mon Sep 17 00:00:00 2001
From: naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Thu, 8 Oct 2009 02:49:11 +0000
Subject: * tool/enc-unicode.rb: parse range notation of UnicodeData.txt.

* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
  enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
  follow above change. [ruby-dev:39444]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25260 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 tool/enc-unicode.rb | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'tool/enc-unicode.rb')

diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb
index 8429bcc178..57edb3b3e5 100755
--- a/tool/enc-unicode.rb
+++ b/tool/enc-unicode.rb
@@ -40,26 +40,36 @@ end
 def parse_unicode_data(file)
   last_cp = 0
   data = {'Cn' => []}
+  beg_cp = nil
   IO.foreach(file) do |line|
     fields = line.split(';')
     cp = fields[0].to_i(16)
 
+    case fields[1]
+    when /\A<(.*),\s*First>\z/
+      beg_cp = cp
+      next
+    when /\A<(.*),\s*Last>\z/
+      cps = (beg_cp..cp).to_a
+    else
+      beg_cp = cp
+      cps = [cp]
+    end
+
     # The Cn category represents unassigned characters. These are not listed in
     # UnicodeData.txt so we must derive them by looking for 'holes' in the range
     # of listed codepoints. We increment the last codepoint seen and compare it
     # with the current codepoint. If the current codepoint is less than
     # last_cp.next we have found a hole, so we add the missing codepoint to the
     # Cn category.
-    while ((last_cp = last_cp.next) < cp)
-      data['Cn'] << last_cp
-    end
+    data['Cn'].concat((last_cp.next...beg_cp).to_a)
 
     # The third field denotes the 'General' category, e.g. Lu
-    (data[fields[2]] ||= []) << cp
+    (data[fields[2]] ||= []).concat(cps)
 
     # The 'Major' category is the first letter of the 'General' category, e.g.
     # 'Lu' -> 'L'
-    (data[fields[2][0,1]] ||= []) << cp
+    (data[fields[2][0,1]] ||= []).concat(cps)
     last_cp = cp
   end
 
-- 
cgit v1.2.3