template/unicode_norm_gen.tmpl: from tool/unicode_norm_gen.rb

* template/unicode_norm_gen.tmpl: use generic_erb.rb to update if changed and manage timestamp, so that source tree on read-only filesystem works. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48129 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2014-10-25 07:20:15 +0000
committer: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2014-10-25 07:20:15 +0000
commit: 9b581e0d0b41dccc8c15400f05ca5c763c6c41b9 (patch)
tree: a1f22b735e7cf00ff41d3acf463e66513e749dd2 /tool
parent: 67a19e7a59dccbc00daed2970350a20124926afb (diff)
1 files changed, 0 insertions, 198 deletions
diff --git a/tool/unicode_norm_gen.rb b/tool/unicode_norm_gen.rb
deleted file mode 100644
index 766be26dc4..0000000000
--- a/tool/unicode_norm_gen.rb
+++ /dev/null
@@ -1,198 +0,0 @@
-# coding: utf-8
-
-# Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
-
-# Script to generate Ruby data structures used in implementing
-# String#unicode_normalize,...
-
-# Constants for input and ouput directory
-InputDataDir = $input || 'enc/unicode/data'
-OuputDataDir = $ouput || 'lib/unicode_normalize'
-
-# convenience methods
-class Integer
-  def to_UTF8() # convert to string, taking legibility into account
-    if self>0xFFFF
-      "\\u{#{to_s(16).upcase}}"
-    elsif self>0x7f
-      "\\u#{to_s(16).upcase.rjust(4, '0')}"
-    else
-      chr.sub(/[\\\"]/, "\\\\\\\&")
-    end
-  end
-end
-
-class Array
-  def line_slice(new_line) # joins items, 8 items per line
-    ary = []
-    0.step(size-1, 8) {|i|
-      ary << self[i, 8].join('')
-    }
-    ary.join(new_line).gsub(/ +$/, '')
-  end
-
-  def to_UTF8() collect {|c| c.to_UTF8}.join('') end
-
-  def to_regexp_chars # converts an array of Integers to character ranges
-    sort.inject([]) do |ranges, value|
-      if ranges.last and ranges.last[1]+1>=value
-        ranges.last[1] = value
-        ranges
-      else
-        ranges << [value, value]
-      end
-    end.collect do |first, last|
-      case last-first
-      when 0
-        first.to_UTF8
-      when 1
-        first.to_UTF8 + last.to_UTF8
-      else
-        first.to_UTF8 + '-' + last.to_UTF8
-      end
-    end.line_slice "\" \\\n    \""
-  end
-end
-
-class Hash
-  def to_hash_string
-    collect do |key, value|
-      "\"#{key.to_UTF8}\"=>\"#{value.to_UTF8}\".freeze, "
-    end.line_slice "\n    "
-  end
-end
-
-# read the file 'CompositionExclusions.txt'
-composition_exclusions = File.open("#{InputDataDir}/CompositionExclusions.txt") {|f|
-  f.grep(/^[A-Z0-9]{4,5}/) {|line| line.hex}
-}
-
-decomposition_table = {}
-kompatible_table = {}
-CombiningClass = {}  # constant to allow use in Integer#to_UTF8
-
-# read the file 'UnicodeData.txt'
-IO.foreach("#{InputDataDir}/UnicodeData.txt") do |line|
-  codepoint, name, _2, char_class, _4, decomposition, *_rest = line.split(";")
-
-  case decomposition
-  when /^[0-9A-F]/
-    decomposition_table[codepoint.hex] = decomposition.split(' ').collect {|w| w.hex}
-  when /^</
-    kompatible_table[codepoint.hex] = decomposition.split(' ')[1..-1].collect {|w| w.hex}
-  end
-  CombiningClass[codepoint.hex] = char_class.to_i if char_class != "0"
-
-  if name=~/(First|Last)>$/ and (char_class!="0" or decomposition!="")
-    warn "Unexpected: Character range with data relevant to normalization!"
-  end
-end
-
-# calculate compositions from decompositions
-composition_table = decomposition_table.reject do |character, decomposition|
-  composition_exclusions.member? character or # predefined composition exclusion
-    decomposition.length<=1 or                # Singleton Decomposition
-    CombiningClass[character] or              # character is not a Starter
-    CombiningClass[decomposition.first]       # decomposition begins with a character that is not a Starter
-end.invert
-
-# recalculate composition_exclusions
-composition_exclusions = decomposition_table.keys - composition_table.values
-
-accent_array = CombiningClass.keys + composition_table.keys.collect {|key| key.last}
-
-composition_starters = composition_table.keys.collect {|key| key.first}
-
-hangul_no_trailing = []
-0xAC00.step(0xD7A3, 28) {|c| hangul_no_trailing << c}
-
-# expand decomposition table values
-decomposition_table.each do |key, value|
-  position = 0
-  while position < value.length
-    if decomposition = decomposition_table[value[position]]
-      decomposition_table[key] = value = value.dup # avoid overwriting composition_table key
-      value[position, 1] = decomposition
-    else
-      position += 1
-    end
-  end
-end
-
-# deal with relationship between canonical and kompatibility decompositions
-decomposition_table.each do |key, value|
-  value = value.dup
-  expanded = false
-  position = 0
-  while position < value.length
-    if decomposition = kompatible_table[value[position]]
-      value[position, 1] = decomposition
-      expanded = true
-    else
-      position += 1
-    end
-  end
-  kompatible_table[key] = value if expanded
-end
-
-class_table_str = CombiningClass.collect do |key, value|
-  "\"#{key.to_UTF8}\"=>#{value}, "
-end.line_slice "\n    "
-
-# generate normalization tables file
-open("#{OuputDataDir}/tables.rb", "w").print <<MAPPING_TABLE_FILE_END
-# coding: us-ascii
-
-# automatically generated by tool/unicode_norm_gen.rb
-
-module UnicodeNormalize
-  accents = "" \\
-    "[#{accent_array.to_regexp_chars}]" \\
-  "".freeze
-  ACCENTS = accents
-  REGEXP_D_STRING = "\#{''  # composition starters and composition exclusions
-    }" \\
-    "[#{(composition_table.values+composition_exclusions).to_regexp_chars}]\#{accents}*" \\
-    "|\#{''  # characters that can be the result of a composition, except composition starters
-    }" \\
-    "[#{(composition_starters-composition_table.values).to_regexp_chars}]?\#{accents}+" \\
-    "|\#{''  # precomposed Hangul syllables
-    }" \\
-    "[\\u{AC00}-\\u{D7A4}]" \\
-  "".freeze
-  REGEXP_C_STRING = "\#{''  # composition exclusions
-    }" \\
-    "[#{composition_exclusions.to_regexp_chars}]\#{accents}*" \\
-    "|\#{''  # composition starters and characters that can be the result of a composition
-    }" \\
-    "[#{(composition_starters+composition_table.values).to_regexp_chars}]?\#{accents}+" \\
-    "|\#{''  # Hangul syllables with separate trailer
-    }" \\
-    "[#{hangul_no_trailing.to_regexp_chars}][\\u11A8-\\u11C2]" \\
-    "|\#{''  # decomposed Hangul syllables
-    }" \\
-    "[\\u1100-\\u1112][\\u1161-\\u1175][\\u11A8-\\u11C2]?" \\
-  "".freeze
-  REGEXP_K_STRING = "" \\
-    "[#{kompatible_table.keys.to_regexp_chars}]" \\
-  "".freeze
-
-  class_table = {
-    #{class_table_str}
-  }
-  class_table.default = 0
-  CLASS_TABLE = class_table.freeze
-
-  DECOMPOSITION_TABLE = {
-    #{decomposition_table.to_hash_string}
-  }.freeze
-
-  KOMPATIBLE_TABLE = {
-    #{kompatible_table.to_hash_string}
-  }.freeze
-
-  COMPOSITION_TABLE = {
-    #{composition_table.to_hash_string}
-  }.freeze
-end
-MAPPING_TABLE_FILE_END
author	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2014-10-25 07:20:15 +0000
committer	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2014-10-25 07:20:15 +0000
commit	9b581e0d0b41dccc8c15400f05ca5c763c6c41b9 (patch)
tree	a1f22b735e7cf00ff41d3acf463e66513e749dd2 /tool
parent	67a19e7a59dccbc00daed2970350a20124926afb (diff)