# # kconv.rb - Kanji Converter. # # $Id$ # # ---- # # kconv.rb implements the Kconv class for Kanji Converter. Additionally, # some methods in String classes are added to allow easy conversion. # require 'nkf' # # Kanji Converter for Ruby. # module Kconv # # Public Constants # #Constant of Encoding # Auto-Detect AUTO = NKF::AUTO # ISO-2022-JP JIS = NKF::JIS # EUC-JP EUC = NKF::EUC # Shift_JIS SJIS = NKF::SJIS # BINARY BINARY = NKF::BINARY # NOCONV NOCONV = NKF::NOCONV # ASCII ASCII = NKF::ASCII # UTF-8 UTF8 = NKF::UTF8 # UTF-16 UTF16 = NKF::UTF16 # UTF-32 UTF32 = NKF::UTF32 # UNKNOWN UNKNOWN = NKF::UNKNOWN # # # Private Constants # #Regexp of Encoding # Regexp of Shift_JIS string (private constant) RegexpShiftjis = /\A(?: [\x00-\x7f\xa1-\xdf] | [\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc] )*\z/nx # Regexp of EUC-JP string (private constant) RegexpEucjp = /\A(?: [\x00-\x7f] | \x8e [\xa1-\xdf] | \x8f [\xa1-\xfe] [\xa1-\xfe] | [\xa1-\xfe] [\xa1-\xfe] )*\z/nx # Regexp of UTF-8 string (private constant) RegexpUtf8 = /\A(?: [\x00-\x7f] | [\xc2-\xdf] [\x80-\xbf] | \xe0 [\xa0-\xbf] [\x80-\xbf] | [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] | \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] | [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] | \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf] )*\z/nx # # Public Methods # # call-seq: # Kconv.kconv(str, to_enc, from_enc=nil) # # Convert str to out_code. # out_code and in_code are given as constants of Kconv. # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want to decode them, use NKF.nkf. def kconv(str, to_enc, from_enc=nil) opt = '' opt += ' --ic=' + from_enc.name if from_enc opt += ' --oc=' + to_enc.name if to_enc ::NKF::nkf(opt, str) end module_function :kconv # # Encode to # # call-seq: # Kconv.tojis(str) -> string # # Convert str to ISO-2022-JP # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-jxm0', str). def tojis(str) ::NKF::nkf('-jm', str) end module_function :tojis # call-seq: # Kconv.toeuc(str) -> string # # Convert str to EUC-JP # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-exm0', str). def toeuc(str) ::NKF::nkf('-em', str) end module_function :toeuc # call-seq: # Kconv.tosjis(str) -> string # # Convert str to Shift_JIS # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-sxm0', str). def tosjis(str) ::NKF::nkf('-sm', str) end module_function :tosjis # call-seq: # Kconv.toutf8(str) -> string # # Convert str to UTF-8 # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-wxm0', str). def toutf8(str) ::NKF::nkf('-wm', str) end module_function :toutf8 # call-seq: # Kconv.toutf16(str) -> string # # Convert str to UTF-16 # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-w16xm0', str). def toutf16(str) ::NKF::nkf('-w16m', str) end module_function :toutf16 # call-seq: # Kconv.toutf32(str) -> string # # Convert str to UTF-32 # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-w32xm0', str). def toutf32(str) ::NKF::nkf('-w32m', str) end module_function :toutf32 # # guess # # call-seq: # Kconv.guess(str) -> integer # # Guess input encoding by NKF.guess def guess(str) ::NKF::guess(str) end module_function :guess # # isEncoding # # call-seq: # Kconv.iseuc(str) -> obj or nil # # Returns whether input encoding is EUC-JP or not. # # *Note* don't expect this return value is MatchData. def iseuc(str) RegexpEucjp.match( str ) end module_function :iseuc # call-seq: # Kconv.issjis(str) -> obj or nil # # Returns whether input encoding is Shift_JIS or not. # # *Note* don't expect this return value is MatchData. def issjis(str) RegexpShiftjis.match( str ) end module_function :issjis # call-seq: # Kconv.isutf8(str) -> obj or nil # # Returns whether input encoding is UTF-8 or not. # # *Note* don't expect this return value is MatchData. def isutf8(str) RegexpUtf8.match( str ) end module_function :isutf8 end class String # call-seq: # String#kconv(to_enc, from_enc) # # Convert self to out_code. # out_code and in_code are given as constants of Kconv. # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want to decode them, use NKF.nkf. def kconv(to_enc, from_enc=nil) Kconv::kconv(self, to_enc, from_enc) end # # to Encoding # # call-seq: # String#tojis -> string # # Convert self to ISO-2022-JP # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-jxm0', str). def tojis; Kconv.tojis(self) end # call-seq: # String#toeuc -> string # # Convert self to EUC-JP # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-exm0', str). def toeuc; Kconv.toeuc(self) end # call-seq: # String#tosjis -> string # # Convert self to Shift_JIS # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-sxm0', str). def tosjis; Kconv.tosjis(self) end # call-seq: # String#toutf8 -> string # # Convert self to UTF-8 # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-wxm0', str). def toutf8; Kconv.toutf8(self) end # call-seq: # String#toutf16 -> string # # Convert self to UTF-16 # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-w16xm0', str). def toutf16; Kconv.toutf16(self) end # call-seq: # String#toutf32 -> string # # Convert self to UTF-32 # # *Note* # This method decode MIME encoded string and # convert halfwidth katakana to fullwidth katakana. # If you don't want it, use NKF.nkf('-w32xm0', str). def toutf32; Kconv.toutf32(self) end # # is Encoding # # call-seq: # String#iseuc -> obj or nil # # Returns whether self's encoding is EUC-JP or not. # # *Note* don't expect this return value is MatchData. def iseuc; Kconv.iseuc(self) end # call-seq: # String#issjis -> obj or nil # # Returns whether self's encoding is Shift_JIS or not. # # *Note* don't expect this return value is MatchData. def issjis; Kconv.issjis(self) end # call-seq: # String#isutf8 -> obj or nil # # Returns whether self's encoding is UTF-8 or not. # # *Note* don't expect this return value is MatchData. def isutf8; Kconv.isutf8(self) end end