summaryrefslogtreecommitdiff
path: root/template/encdb.h.tmpl
blob: 06afb5dbe1f2ccf319edf262a0381b2ae9c14330 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<%
#
# OnigEncodingDefine(foo, Foo) = {
#   ..
#   "Shift_JIS", /* Canonical Name */
#   ..
# };
# ENC_ALIAS("SJIS", "Shift_JIS")
# ENC_REPLICATE("Windows-31J", "Shift_JIS")
# ENC_ALIAS("CP932", "Windows-31J")
#

def check_duplication(defs, name, fn, line)
  if defs[name]
    raise ArgumentError, "%s:%d: encoding %s is already registered(%s:%d)" %
      [fn, line, name, *defs[name]]
  else
    defs[name.upcase] = [fn,line]
  end
end

lines = []
BUILTIN_ENCODINGS = {
  'ASCII-8BIT' => 0,
  'UTF-8' => 1,
  'US-ASCII' => 2,
}
encodings = %w[ASCII-8BIT UTF-8 US-ASCII] # BUILTIN_ENCODINGS.keys is not available on cross compiling and used ruby 1.8
count = encodings.size
defs = {}
encdirs = ARGV.dup
encdirs << 'enc' if encdirs.empty?
files = {}
encdirs.each do |encdir|
  next unless File.directory?(encdir)
  Dir.open(encdir) {|d| d.grep(/.+\.[ch]\z/)}.sort_by {|e|
    e.scan(/(\d+)|(\D+)/).map {|n,a| a||[n.size,n.to_i]}.flatten
  }.each do |fn|
    next if files[fn]
    files[fn] = true
    open(File.join(encdir,fn)) do |f|
      name = nil
      skip_ifndef_ruby = false
      encoding_def = false
      f.each_line do |line|
        case line
        when /^#ifndef RUBY/
          skip_ifndef_ruby = true
        when /^#endif/
          skip_ifndef_ruby = false
        end
        next if skip_ifndef_ruby
        encoding_def = true if /^OnigEncodingDefine/ =~ line
        if encoding_def && /"(.*?)"/ =~ line
          encoding_def = false
          if name
            lines << %[ENC_SET_BASE("#$1", "#{name}");]
          else
            name = $1
          end
          check_duplication(defs, $1, fn, $.)
          next if BUILTIN_ENCODINGS[name]
          encodings << $1
          count += 1
        else
          case line
          when /^\s*rb_enc_register\(\s*"([^"]+)"/
            count += 1
            line = nil
            encodings << $1
          when /^ENC_REPLICATE\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
            raise ArgumentError,
            '%s:%d: ENC_REPLICATE: %s is not defined yet. (replica %s)' %
              [fn, $., $2, $1] unless defs[$2.upcase]
            count += 1
          when /^ENC_ALIAS\(\s*"([^"]+)"\s*,\s*"([^"]+)"/
            raise ArgumentError,
            '%s:%d: ENC_ALIAS: %s is not defined yet. (alias %s)' %
              [fn, $., $2, $1] unless defs[$2.upcase]
          when /^ENC_DUMMY\w*\(\s*"([^"]+)"/
            count += 1
          else
            next
          end
          check_duplication(defs, $1, fn, $.)
          lines << line.sub(/;.*/m, "").chomp + ";" if line
        end
      end
    end
  end
end
encodings.each_with_index do |e, i|
%>ENC_DEFINE("<%=e%>");
% end
% encidx = encodings.size - 1
% lines.each do |line|
<%=line%>
% end

#define ENCODING_COUNT <%=count%>