template/unicode_norm_gen.tmpl: from tool/unicode_norm_gen.rb

* template/unicode_norm_gen.tmpl: use generic_erb.rb to update if changed and manage timestamp, so that source tree on read-only filesystem works. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48129 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2014-10-25 07:20:15 +0000
committer: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2014-10-25 07:20:15 +0000
commit: 9b581e0d0b41dccc8c15400f05ca5c763c6c41b9 (patch)
tree: a1f22b735e7cf00ff41d3acf463e66513e749dd2
parent: 67a19e7a59dccbc00daed2970350a20124926afb (diff)
2 files changed, 85 insertions, 68 deletions
diff --git a/common.mk b/common.mk
index 40a227b047..4a8aa394d6 100644
--- a/common.mk
+++ b/common.mk
@@ -1120,10 +1120,14 @@ UPDATE_UNICODE_FILES_DEPS = $(ALWAYS_UPDATE_UNICODE:yes=PHONY)
 	    UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt
 	@exit > .update-unicode.time
 
-$(srcdir)/lib/unicode_normalize/tables.rb: \
-		$(srcdir)/tool/unicode_norm_gen.rb $(UNICODE_FILES)
-	$(BASERUBY) -s -C "$(srcdir)" tool/unicode_norm_gen.rb \
-		-input=enc/unicode/data -ouput=lib/unicode_normalize
+$(srcdir)/lib/unicode_normalize/tables.rb: ./.unicode-tables.time
+
+./.unicode-tables.time: $(srcdir)/tool/generic_erb.rb \
+		$(srcdir)/template/unicode_norm_gen.tmpl $(UNICODE_FILES)
+	$(Q) $(BASERUBY) $(srcdir)/tool/generic_erb.rb \
+		-c -t$@ -o $(srcdir)/lib/unicode_normalize/tables.rb \
+		-I $(srcdir) \
+		$(srcdir)/template/unicode_norm_gen.tmpl enc/unicode/data lib/unicode_normalize
 
 info: info-program info-libruby_a info-libruby_so info-arch
 info-program: PHONY
diff --git a/tool/unicode_norm_gen.rb b/template/unicode_norm_gen.tmpl
index 766be26dc4..332cb156ef 100644
--- a/tool/unicode_norm_gen.rb
+++ b/template/unicode_norm_gen.tmpl
@@ -1,13 +1,13 @@
-# coding: utf-8
-
+%# -*- mode: ruby; coding: utf-8 -*-
+<%
 # Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
 
 # Script to generate Ruby data structures used in implementing
 # String#unicode_normalize,...
 
 # Constants for input and ouput directory
-InputDataDir = $input || 'enc/unicode/data'
-OuputDataDir = $ouput || 'lib/unicode_normalize'
+InputDataDir = ARGV[0] || 'enc/unicode/data'
+OuputDataDir = ARGV[1] || 'lib/unicode_normalize'
 
 # convenience methods
 class Integer
@@ -22,18 +22,27 @@ class Integer
   end
 end
 
-class Array
-  def line_slice(new_line) # joins items, 8 items per line
-    ary = []
-    0.step(size-1, 8) {|i|
-      ary << self[i, 8].join('')
-    }
-    ary.join(new_line).gsub(/ +$/, '')
+module Enumerable
+  unless method_defined?(:each_slice)
+    def each_slice(n)
+      ary = []
+      each do |i|
+        ary << i
+        if ary.size >= n
+          yield ary
+          ary = []
+        end
+      end
+      yield ary unless ary.empty?
+      self
+    end
   end
+end
 
+class Array
   def to_UTF8() collect {|c| c.to_UTF8}.join('') end
 
-  def to_regexp_chars # converts an array of Integers to character ranges
+  def each_regexp_chars(n = 8) # converts an array of Integers to character ranges
     sort.inject([]) do |ranges, value|
       if ranges.last and ranges.last[1]+1>=value
         ranges.last[1] = value
@@ -50,29 +59,23 @@ class Array
       else
         first.to_UTF8 + '-' + last.to_UTF8
       end
-    end.line_slice "\" \\\n    \""
-  end
-end
-
-class Hash
-  def to_hash_string
-    collect do |key, value|
-      "\"#{key.to_UTF8}\"=>\"#{value.to_UTF8}\".freeze, "
-    end.line_slice "\n    "
+    end.each_slice(n) do |slice|
+      yield slice.join('')
+    end
   end
 end
 
 # read the file 'CompositionExclusions.txt'
-composition_exclusions = File.open("#{InputDataDir}/CompositionExclusions.txt") {|f|
+composition_exclusions = vpath.open("#{InputDataDir}/CompositionExclusions.txt") {|f|
   f.grep(/^[A-Z0-9]{4,5}/) {|line| line.hex}
 }
 
 decomposition_table = {}
 kompatible_table = {}
-CombiningClass = {}  # constant to allow use in Integer#to_UTF8
+combining_class = {}  # constant to allow use in Integer#to_UTF8
 
 # read the file 'UnicodeData.txt'
-IO.foreach("#{InputDataDir}/UnicodeData.txt") do |line|
+vpath.foreach("#{InputDataDir}/UnicodeData.txt") do |line|
   codepoint, name, _2, char_class, _4, decomposition, *_rest = line.split(";")
 
   case decomposition
@@ -81,7 +84,7 @@ IO.foreach("#{InputDataDir}/UnicodeData.txt") do |line|
   when /^</
     kompatible_table[codepoint.hex] = decomposition.split(' ')[1..-1].collect {|w| w.hex}
   end
-  CombiningClass[codepoint.hex] = char_class.to_i if char_class != "0"
+  combining_class[codepoint.hex] = char_class.to_i if char_class != "0"
 
   if name=~/(First|Last)>$/ and (char_class!="0" or decomposition!="")
     warn "Unexpected: Character range with data relevant to normalization!"
@@ -92,14 +95,14 @@ end
 composition_table = decomposition_table.reject do |character, decomposition|
   composition_exclusions.member? character or # predefined composition exclusion
     decomposition.length<=1 or                # Singleton Decomposition
-    CombiningClass[character] or              # character is not a Starter
-    CombiningClass[decomposition.first]       # decomposition begins with a character that is not a Starter
+    combining_class[character] or             # character is not a Starter
+    combining_class[decomposition.first]      # decomposition begins with a character that is not a Starter
 end.invert
 
 # recalculate composition_exclusions
 composition_exclusions = decomposition_table.keys - composition_table.values
 
-accent_array = CombiningClass.keys + composition_table.keys.collect {|key| key.last}
+accent_array = combining_class.keys + composition_table.keys.collect {|key| key.last}
 
 composition_starters = composition_table.keys.collect {|key| key.first}
 
@@ -135,64 +138,74 @@ decomposition_table.each do |key, value|
   kompatible_table[key] = value if expanded
 end
 
-class_table_str = CombiningClass.collect do |key, value|
-  "\"#{key.to_UTF8}\"=>#{value}, "
-end.line_slice "\n    "
-
 # generate normalization tables file
-open("#{OuputDataDir}/tables.rb", "w").print <<MAPPING_TABLE_FILE_END
-# coding: us-ascii
+%># coding: us-ascii
+%# >
 
 # automatically generated by tool/unicode_norm_gen.rb
 
 module UnicodeNormalize
-  accents = "" \\
-    "[#{accent_array.to_regexp_chars}]" \\
+  accents = "" \
+    "[<% accent_array.each_regexp_chars do |rx|%><%=rx%>" \
+    "<% end%>]" \
   "".freeze
   ACCENTS = accents
-  REGEXP_D_STRING = "\#{''  # composition starters and composition exclusions
-    }" \\
-    "[#{(composition_table.values+composition_exclusions).to_regexp_chars}]\#{accents}*" \\
-    "|\#{''  # characters that can be the result of a composition, except composition starters
-    }" \\
-    "[#{(composition_starters-composition_table.values).to_regexp_chars}]?\#{accents}+" \\
-    "|\#{''  # precomposed Hangul syllables
-    }" \\
-    "[\\u{AC00}-\\u{D7A4}]" \\
+  REGEXP_D_STRING = "#{''  # composition starters and composition exclusions
+    }" \
+    "[<% (composition_table.values+composition_exclusions).each_regexp_chars do |rx|%><%=rx%>" \
+    "<% end%>]#{accents}*" \
+    "|#{''  # characters that can be the result of a composition, except composition starters
+    }" \
+    "[<% (composition_starters-composition_table.values).each_regexp_chars do |rx|%><%=rx%>" \
+    "<% end%>]?#{accents}+" \
+    "|#{''  # precomposed Hangul syllables
+    }" \
+    "[\u{AC00}-\u{D7A4}]" \
   "".freeze
-  REGEXP_C_STRING = "\#{''  # composition exclusions
-    }" \\
-    "[#{composition_exclusions.to_regexp_chars}]\#{accents}*" \\
-    "|\#{''  # composition starters and characters that can be the result of a composition
-    }" \\
-    "[#{(composition_starters+composition_table.values).to_regexp_chars}]?\#{accents}+" \\
-    "|\#{''  # Hangul syllables with separate trailer
-    }" \\
-    "[#{hangul_no_trailing.to_regexp_chars}][\\u11A8-\\u11C2]" \\
-    "|\#{''  # decomposed Hangul syllables
-    }" \\
-    "[\\u1100-\\u1112][\\u1161-\\u1175][\\u11A8-\\u11C2]?" \\
+  REGEXP_C_STRING = "#{''  # composition exclusions
+    }" \
+    "[<% composition_exclusions.each_regexp_chars do |rx|%><%=rx%>" \
+    "<% end%>]#{accents}*" \
+    "|#{''  # composition starters and characters that can be the result of a composition
+    }" \
+    "[<% (composition_starters+composition_table.values).each_regexp_chars do |rx|%><%=rx%>" \
+    "<% end%>]?#{accents}+" \
+    "|#{''  # Hangul syllables with separate trailer
+    }" \
+    "[<% hangul_no_trailing.each_regexp_chars do |rx|%><%=rx%>" \
+    "<% end%>][\u11A8-\u11C2]" \
+    "|#{''  # decomposed Hangul syllables
+    }" \
+    "[\u1100-\u1112][\u1161-\u1175][\u11A8-\u11C2]?" \
   "".freeze
-  REGEXP_K_STRING = "" \\
-    "[#{kompatible_table.keys.to_regexp_chars}]" \\
+  REGEXP_K_STRING = "" \
+    "[<% kompatible_table.keys.each_regexp_chars do |rx|%><%=rx%>" \
+    "<%end%>]" \
   "".freeze
 
   class_table = {
-    #{class_table_str}
+% combining_class.each_slice(8) do |slice|
+   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=><%=value%><%=%>,<% end%>
+% end
   }
   class_table.default = 0
   CLASS_TABLE = class_table.freeze
 
   DECOMPOSITION_TABLE = {
-    #{decomposition_table.to_hash_string}
+% decomposition_table.each_slice(8) do |slice|
+   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>".freeze<%=%>,<% end%>
+% end
   }.freeze
 
   KOMPATIBLE_TABLE = {
-    #{kompatible_table.to_hash_string}
+% kompatible_table.each_slice(8) do |slice|
+   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>".freeze<%=%>,<% end%>
+% end
   }.freeze
 
   COMPOSITION_TABLE = {
-    #{composition_table.to_hash_string}
+% composition_table.each_slice(8) do |slice|
+   <% slice.each do |key, value|%> "<%=key.to_UTF8%>"=>"<%=value.to_UTF8%>".freeze<%=%>,<% end%>
+% end
   }.freeze
 end
-MAPPING_TABLE_FILE_END
author	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2014-10-25 07:20:15 +0000
committer	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2014-10-25 07:20:15 +0000
commit	9b581e0d0b41dccc8c15400f05ca5c763c6c41b9 (patch)
tree	a1f22b735e7cf00ff41d3acf463e66513e749dd2
parent	67a19e7a59dccbc00daed2970350a20124926afb (diff)