summaryrefslogtreecommitdiff
path: root/enc/make_encdb.rb
blob: 8b5731f548b4b046bfd0e74f5c115bb18ea856ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#
# OnigEncodingDefine(foo, Foo) = {
#   ..
#   "Shift_JIS", /* Canonical Name */
#   ..
# };
# ENC_ALIAS("SJIS", "Shift_JIS")
# ENC_REPLICATE("Windows-31J", "Shift_JIS")
# ENC_ALIAS("CP932", "Windows-31J")
#

def check_duplication(defs, name, fn, line)
  if defs[name]
    raise ArgumentError, "%s:%d: encoding %s is already registered(%s:%d)" %
      [fn, line, name, *defs[name]]
  else
    defs[name.upcase] = [fn,line]
  end
end

count = 0
lines = []
encodings = []
defs = {}
encdir = ARGV[0]
outhdr = ARGV[1] || 'encdb.h'
Dir.open(encdir) {|d| d.grep(/.+\.[ch]\z/)}.sort_by {|e|
  e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten
}.each do |fn|
  open(File.join(encdir,fn)) do |f|
    orig = nil
    name = nil
    f.each_line do |line|
      if (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
        if $1
          check_duplication(defs, $1, fn, $.)
          encodings << $1
          count += 1
        end
      else
        case line
        when /^\s*rb_enc_register\(\s*"([^"]+)"/
          count += 1
          line = nil
        when /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
          raise ArgumentError,
          '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' %
            [fn, $., $2, $1] unless defs[$2.upcase]
          count += 1
        when /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
          raise ArgumentError,
          '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' %
            [fn, $., $2, $1] unless defs[$2.upcase]
        when /^ENC_DUMMY\(\s*"([^"]+)"/
          count += 1
        else
          next
        end
        check_duplication(defs, $1, fn, $.)
        lines << line.sub(/;.*/m, "").chomp + ";\n" if line
      end
    end
  end
end

result = encodings.map {|e| %[ENC_DEFINE("#{e}");\n]}.join + lines.join + 
  "\n#define ENCODING_COUNT #{count}\n"
open(outhdr, 'wb') do |f|
  f.print result
end