summaryrefslogtreecommitdiff
path: root/lib/jcode.rb
blob: 5b2289932f3b06ad74e0517be14bc4a53dac6cf0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# jcode.rb - ruby code to handle japanese (EUC/SJIS) string

class String
  printf STDERR, "feel free for some warnings:\n" if $VERBOSE

  alias original_succ succ
  private :original_succ

  def succ
    if self[-2] && self[-2] & 0x80 != 0
      s = self.dup
      s[-1] += 1
      return s
    else
      original_succ
    end
  end

  def upto(to)
    return if self > to

    curr = self
    tail = self[-2..-1]
    if tail.length == 2 and tail  =~ /^.$/ then
      if self[0..-2] == to[0..-2]
	for c in self[-1] .. to[-1]
	  yield self[0..-2]+c.chr
	end
      end
    else
      loop do
	yield curr
	return if curr == to
	curr = curr.succ
	return if curr.length > to.length
      end
    end
    return nil
  end

  def _expand_ch
    a = []
    self.scan(/(.|\n)-(.|\n)|(.|\n)/) do |r|
      if $3
	a.push $3
      elsif $1.length != $2.length
 	next
      elsif $1.length == 1
 	$1[0].upto($2[0]) { |c| a.push c.chr }
      else
 	$1.upto($2) { |c| a.push c }
      end
    end
    a
  end

  def tr!(from, to)
    return self.delete!(from) if to.length == 0

    if from =~ /^\^/
      comp=TRUE
      from = $'
    end
    afrom = from._expand_ch
    ato = to._expand_ch
    i = 0
    if comp
      self.gsub!(/(.|\n)/) do |c|
	unless afrom.include?(c)
	  ato[-1]
	else
	  c
	end
      end
    else
      self.gsub!(/(.|\n)/) do |c|
	if i = afrom.index(c)
	  if i < ato.size then ato[i] else ato[-1] end
	else
	  c
	end
      end
    end
  end

  def tr(from, to)
    self.dup.tr!(from, to)
  end

  def delete!(del)
    if del =~ /^\^/
      comp=TRUE
      del = $'
    end
    adel = del._expand_ch
    if comp
      self.gsub!(/(.|\n)/) do |c|
	next unless adel.include?(c)
	c
      end
    else
      self.gsub!(/(.|\n)/) do |c|
	next if adel.include?(c)
	c
      end
    end
  end

  def delete(del)
    self.dup.delete!(del)
  end

  def squeeze!(del=nil)
    if del
      if del =~ /^\^/
	comp=TRUE
	del = $'
      end
      adel = del._expand_ch
      if comp
	self.gsub!(/(.|\n)\1+/) do
	  next unless adel.include?($1)
	  $&
	end
      else
	for c in adel
	  cq = Regexp.quote(c)
	  self.gsub!(/#{cq}(#{cq})+/, cq)
	end
      end
      self
    else
      self.gsub!(/(.|\n)\1+/, '\1')
    end
  end

  def squeeze(del=nil)
    self.dup.squeeze!(del)
  end

  def tr_s!(from, to)
    return self.delete!(from) if to.length == 0
    if from =~ /^\^/
      comp=TRUE
      from = $'
    end
    afrom = from._expand_ch
    ato = to._expand_ch
    i = 0
    c = nil
    last = nil
    self.gsub!(/(.|\n)/) do |c|
      if comp
	unless afrom.include?(c)
	  ato[-1]
	else
	  c
	end
      elsif i = afrom.index(c)
	c = if i < ato.size then ato[i] else ato[-1] end
	next if c == last
	last = c
      else
	last = nil
        c
      end
    end
  end

  def tr_s(from, to)
    self.dup.tr_s!(from,to)
  end

end