From d464704f111d211c1f1ff9ef23ef1d755054be00 Mon Sep 17 00:00:00 2001 From: shyouhei Date: Wed, 15 Aug 2007 19:08:43 +0000 Subject: add tag v1_8_5_54 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_8_5_54@12952 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ruby_1_8_5/ext/nkf/lib/kconv.rb | 367 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 ruby_1_8_5/ext/nkf/lib/kconv.rb (limited to 'ruby_1_8_5/ext/nkf/lib/kconv.rb') diff --git a/ruby_1_8_5/ext/nkf/lib/kconv.rb b/ruby_1_8_5/ext/nkf/lib/kconv.rb new file mode 100644 index 0000000000..711a3c10a1 --- /dev/null +++ b/ruby_1_8_5/ext/nkf/lib/kconv.rb @@ -0,0 +1,367 @@ +# +# kconv.rb - Kanji Converter. +# +# $Id: kconv.rb,v 1.3.6.6 2006/06/19 14:52:54 naruse Exp $ +# +# ---- +# +# kconv.rb implements the Kconv class for Kanji Converter. Additionally, +# some methods in String classes are added to allow easy conversion. +# + +require 'nkf' + +# +# Kanji Converter for Ruby. +# +module Kconv + # + # Public Constants + # + + #Constant of Encoding + + # Auto-Detect + AUTO = NKF::AUTO + # ISO-2022-JP + JIS = NKF::JIS + # EUC-JP + EUC = NKF::EUC + # Shift_JIS + SJIS = NKF::SJIS + # BINARY + BINARY = NKF::BINARY + # NOCONV + NOCONV = NKF::NOCONV + # ASCII + ASCII = NKF::ASCII + # UTF-8 + UTF8 = NKF::UTF8 + # UTF-16 + UTF16 = NKF::UTF16 + # UTF-32 + UTF32 = NKF::UTF32 + # UNKNOWN + UNKNOWN = NKF::UNKNOWN + + # + # Private Constants + # + + # Revision of kconv.rb + REVISION = %q$Revision: 1.3.6.6 $ + + #Regexp of Encoding + + # Regexp of Shift_JIS string (private constant) + RegexpShiftjis = /\A(?: + [\x00-\x7f\xa1-\xdf] | + [\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc] + )*\z/nx + + # Regexp of EUC-JP string (private constant) + RegexpEucjp = /\A(?: + [\x00-\x7f] | + \x8e [\xa1-\xdf] | + \x8f [\xa1-\xdf] [\xa1-\xfe] | + [\xa1-\xdf] [\xa1-\xfe] + )*\z/nx + + # Regexp of UTF-8 string (private constant) + RegexpUtf8 = /\A(?: + [\x00-\x7f] | + [\xc2-\xdf] [\x80-\xbf] | + \xe0 [\xa0-\xbf] [\x80-\xbf] | + [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] | + \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] | + [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] | + \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf] + )*\z/nx + + # + # Public Methods + # + + # call-seq: + # Kconv.kconv(str, out_code, in_code = Kconv::AUTO) + # + # Convert str to out_code. + # out_code and in_code are given as constants of Kconv. + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want to decode them, use NKF.nkf. + def kconv(str, out_code, in_code = AUTO) + opt = '-' + case in_code + when ::NKF::JIS + opt << 'J' + when ::NKF::EUC + opt << 'E' + when ::NKF::SJIS + opt << 'S' + when ::NKF::UTF8 + opt << 'W' + when ::NKF::UTF16 + opt << 'W16' + end + + case out_code + when ::NKF::JIS + opt << 'j' + when ::NKF::EUC + opt << 'e' + when ::NKF::SJIS + opt << 's' + when ::NKF::UTF8 + opt << 'w' + when ::NKF::UTF16 + opt << 'w16' + when ::NKF::NOCONV + return str + end + + opt = '' if opt == '-' + + ::NKF::nkf(opt, str) + end + module_function :kconv + + # + # Encode to + # + + # call-seq: + # Kconv.tojis(str) -> string + # + # Convert str to ISO-2022-JP + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-jxm0', str). + def tojis(str) + ::NKF::nkf('-jm', str) + end + module_function :tojis + + # call-seq: + # Kconv.toeuc(str) -> string + # + # Convert str to EUC-JP + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-exm0', str). + def toeuc(str) + ::NKF::nkf('-em0', str) + end + module_function :toeuc + + # call-seq: + # Kconv.tosjis(str) -> string + # + # Convert str to Shift_JIS + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-sxm0', str). + def tosjis(str) + ::NKF::nkf('-sm', str) + end + module_function :tosjis + + # call-seq: + # Kconv.toutf8(str) -> string + # + # Convert str to UTF-8 + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-wxm0', str). + def toutf8(str) + ::NKF::nkf('-wm', str) + end + module_function :toutf8 + + # call-seq: + # Kconv.toutf16(str) -> string + # + # Convert str to UTF-16 + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-w16xm0', str). + def toutf16(str) + ::NKF::nkf('-w16m', str) + end + module_function :toutf16 + + # + # guess + # + + # call-seq: + # Kconv.guess(str) -> integer + # + # Guess input encoding by NKF.guess2 + def guess(str) + ::NKF::guess(str) + end + module_function :guess + + # call-seq: + # Kconv.guess_old(str) -> integer + # + # Guess input encoding by NKF.guess1 + def guess_old(str) + ::NKF::guess1(str) + end + module_function :guess_old + + # + # isEncoding + # + + # call-seq: + # Kconv.iseuc(str) -> obj or nil + # + # Returns whether input encoding is EUC-JP or not. + # + # *Note* don't expect this return value is MatchData. + def iseuc(str) + RegexpEucjp.match( str ) + end + module_function :iseuc + + # call-seq: + # Kconv.issjis(str) -> obj or nil + # + # Returns whether input encoding is Shift_JIS or not. + # + # *Note* don't expect this return value is MatchData. + def issjis(str) + RegexpShiftjis.match( str ) + end + module_function :issjis + + # call-seq: + # Kconv.isutf8(str) -> obj or nil + # + # Returns whether input encoding is UTF-8 or not. + # + # *Note* don't expect this return value is MatchData. + def isutf8(str) + RegexpUtf8.match( str ) + end + module_function :isutf8 + +end + +class String + # call-seq: + # String#kconv(out_code, in_code = Kconv::AUTO) + # + # Convert self to out_code. + # out_code and in_code are given as constants of Kconv. + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want to decode them, use NKF.nkf. + def kconv(out_code, in_code=Kconv::AUTO) + Kconv::kconv(self, out_code, in_code) + end + + # + # to Encoding + # + + # call-seq: + # String#tojis -> string + # + # Convert self to ISO-2022-JP + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-jxm0', str). + def tojis; Kconv.tojis(self) end + + # call-seq: + # String#toeuc -> string + # + # Convert self to EUC-JP + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-exm0', str). + def toeuc; Kconv.toeuc(self) end + + # call-seq: + # String#tosjis -> string + # + # Convert self to Shift_JIS + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-sxm0', str). + def tosjis; Kconv.tosjis(self) end + + # call-seq: + # String#toutf8 -> string + # + # Convert self to UTF-8 + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-wxm0', str). + def toutf8; Kconv.toutf8(self) end + + # call-seq: + # String#toutf16 -> string + # + # Convert self to UTF-16 + # + # *Note* + # This method decode MIME encoded string and + # convert halfwidth katakana to fullwidth katakana. + # If you don't want it, use NKF.nkf('-w16xm0', str). + def toutf16; Kconv.toutf16(self) end + + # + # is Encoding + # + + # call-seq: + # String#iseuc -> obj or nil + # + # Returns whether self's encoding is EUC-JP or not. + # + # *Note* don't expect this return value is MatchData. + def iseuc; Kconv.iseuc(self) end + + # call-seq: + # String#issjis -> obj or nil + # + # Returns whether self's encoding is Shift_JIS or not. + # + # *Note* don't expect this return value is MatchData. + def issjis; Kconv.issjis(self) end + + # call-seq: + # String#isutf8 -> obj or nil + # + # Returns whether self's encoding is UTF-8 or not. + # + # *Note* don't expect this return value is MatchData. + def isutf8; Kconv.isutf8(self) end +end -- cgit v1.2.3