# jcode.rb - ruby code to handle japanese (EUC/SJIS) string if $VERBOSE && $KCODE == "NONE" warn "Warning: $KCODE is NONE." end $vsave, $VERBOSE = $VERBOSE, false class String warn "feel free for some warnings:\n" if $VERBOSE def _regex_quote(str) str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do $1 || $2 || '\\' + $3 end end private :_regex_quote PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]' PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]' PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]' RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n') RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n') RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n') SUCC = {} SUCC['s'] = Hash.new(1) for i in 0 .. 0x3f SUCC['s'][i.chr] = 0x40 - i end SUCC['s']["\x7e"] = 0x80 - 0x7e SUCC['s']["\xfd"] = 0x100 - 0xfd SUCC['s']["\xfe"] = 0x100 - 0xfe SUCC['s']["\xff"] = 0x100 - 0xff SUCC['e'] = Hash.new(1) for i in 0 .. 0xa0 SUCC['e'][i.chr] = 0xa1 - i end SUCC['e']["\xfe"] = 2 SUCC['u'] = Hash.new(1) for i in 0 .. 0x7f SUCC['u'][i.chr] = 0x80 - i end SUCC['u']["\xbf"] = 0x100 - 0xbf def mbchar? case $KCODE[0] when ?s, ?S self =~ RE_SJIS when ?e, ?E self =~ RE_EUC when ?u, ?U self =~ RE_UTF8 else nil end end def end_regexp case $KCODE[0] when ?s, ?S /#{PATTERN_SJIS}$/on when ?e, ?E /#{PATTERN_EUC}$/on when ?u, ?U /#{PATTERN_UTF8}$/on else /.$/on end end alias original_succ! succ! private :original_succ! alias original_succ succ private :original_succ def succ! reg = end_regexp if self =~ reg succ_table = SUCC[$KCODE[0,1].downcase] begin self[-1] += succ_table[self[-1]] self[-2] += 1 if self[-1] == 0 end while self !~ reg self else original_succ! end end def succ str = self.dup str.succ! or str end private def _expand_ch str a = [] str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do from = $1 || $2 to = $3 || $4 one = $5 || $6 if one a.push one elsif from.length != to.length next elsif from.length == 1 from[0].upto(to[0]) { |c| a.push c.chr } else from.upto(to) { |c| a.push c } end end a end def expand_ch_hash from, to h = {} afrom = _expand_ch(from) ato = _expand_ch(to) afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end h end HashCache = {} TrPatternCache = {} DeletePatternCache = {} SqueezePatternCache = {} public def tr!(from, to) return nil if from == "" return self.delete!(from) if to == "" pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/ if from[0] == ?^ last = /.$/.match(to)[0] self.gsub!(pattern, last) else h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to) self.gsub!(pattern) do |c| h[c] end end end def tr(from, to) (str = self.dup).tr!(from, to) or str end def delete!(del) return nil if del == "" self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '') end def delete(del) (str = self.dup).delete!(del) or str end def squeeze!(del=nil) return nil if del == "" pattern = if del SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/ else /(.|\n)\1+/ end self.gsub!(pattern, '\1') end def squeeze(del=nil) (str = self.dup).squeeze!(del) or str end def tr_s!(from, to) return self.delete!(from) if to.length == 0 pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1*/ if from[0] == ?^ last = /.$/.match(to)[0] self.gsub!(pattern, last) else h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to) self.gsub!(pattern) do h[$1] end end end def tr_s(from, to) (str = self.dup).tr_s!(from,to) or str end def chop! self.gsub!(/(?:.|\r?\n)\z/, '') end def chop (str = self.dup).chop! or str end def jlength self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length end alias jsize jlength def jcount(str) self.delete("^#{str}").jlength end def each_char if block_given? scan(/./m) do |x| yield x end else scan(/./m) end end end $VERBOSE = $vsave an title='2014-12-24 02:53:37 +0000'>2014-12-24* hash.c (rb_hash_delete): return Qnil if there are no correspondingko1 2014-11-17symbol.c: symbol type predicate functionsnobu 2014-11-15* internal.h: Include ruby.h and ruby/encoding.h to beakr 2014-10-27id.h.tmpl: ANDOP and OROPnobu 2014-10-27parse.y: optimize IDs in rippernobu 2014-10-27symbol.h: optimize ID2SYMnobu 2014-10-17symbol.c (register_sym): debug codenobu 2014-10-14symbol.c: ids arraynobu 2014-10-14symbol.c: immortal IDsnobu 2014-10-14symbol.c: rename rb_str_dynamic_internnobu 2014-10-06symbol.c: junk-base attrsetnobu 2014-09-19symbol.c: fix dynamic attrset IDnobu 2014-09-08symbol.c (rb_intern_cstr_without_pindown): check dsymbol on returnnormal 2014-08-30internal.h: WARN_UNUSED_RESULTnobu 2014-08-30symbol.c (rb_sym2id): do not return garbage objectnormal 2014-08-16string.c: rb_setup_fake_strnobu 2014-08-15* string.c (setup_fake_str): fake strings should not set class byko1 2014-08-04symbol.c: move macrosnobu 2014-07-27symbol.c: wrap global_symbols tablesnobu 2014-07-27symbol.c: return the results directlynobu 2014-07-25symbol.c (dsymbol_check): remove unneeded semi-colonnormal 2014-07-11symbol.c: remove dependency on parse.hnobu 2014-07-09* symbol.c: remove rb_gc_mark_symbols().ko1 2014-07-09symbol.c, symbol.h: split from parse.ynobu