diff options
author | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-25 15:02:05 +0000 |
---|---|---|
committer | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-25 15:02:05 +0000 |
commit | 0dc342de848a642ecce8db697b8fecd83a63e117 (patch) | |
tree | 2b7ed4724aff1f86073e4740134bda9c4aac1a39 /trunk/ext/nkf/lib/kconv.rb | |
parent | ef70cf7138ab8034b5b806f466e4b484b24f0f88 (diff) |
added tag v1_9_0_4
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_9_0_4@18845 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'trunk/ext/nkf/lib/kconv.rb')
-rw-r--r-- | trunk/ext/nkf/lib/kconv.rb | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/trunk/ext/nkf/lib/kconv.rb b/trunk/ext/nkf/lib/kconv.rb new file mode 100644 index 0000000000..81a8a4b72b --- /dev/null +++ b/trunk/ext/nkf/lib/kconv.rb @@ -0,0 +1,282 @@ +# +# kconv.rb - Kanji Converter. +# +# $Id$ +# +# ---- +# +# kconv.rb implements the Kconv class for Kanji Converter. Additionally, +# some methods in String classes are added to allow easy conversion. +# + +require 'nkf' + +# +# Kanji Converter for Ruby. +# +module Kconv + # + # Public Constants + # + + #Constant of Encoding + + # Auto-Detect + AUTO = NKF::AUTO + # ISO-2022-JP + JIS = NKF::JIS + # EUC-JP + EUC = NKF::EUC + # Shift_JIS + SJIS = NKF::SJIS + # BINARY + BINARY = NKF::BINARY + # NOCONV + NOCONV = NKF::NOCONV + # ASCII + ASCII = NKF::ASCII + # UTF-8 + UTF8 = NKF::UTF8 + # UTF-16 + UTF16 = NKF::UTF16 + # UTF-32 + UTF32 = NKF::UTF32 + # UNKNOWN + UNKNOWN = NKF::UNKNOWN + + # + # Public Methods + # + + # call-seq: + # Kconv.kconv(str, to_enc, from_enc=nil) + # + # Convert <code>str</code> to out_code. + # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv. + def kconv(str, to_enc, from_enc=nil) + opt = '' + opt += ' --ic=' + from_enc.to_s if from_enc + opt += ' --oc=' + to_enc.to_s if to_enc + + ::NKF::nkf(opt, str) + end + module_function :kconv + + # + # Encode to + # + + # call-seq: + # Kconv.tojis(str) => string + # + # Convert <code>str</code> to ISO-2022-JP + def tojis(str) + kconv(str, JIS) + end + module_function :tojis + + # call-seq: + # Kconv.toeuc(str) => string + # + # Convert <code>str</code> to EUC-JP + def toeuc(str) + kconv(str, EUC) + end + module_function :toeuc + + # call-seq: + # Kconv.tosjis(str) => string + # + # Convert <code>str</code> to Shift_JIS + def tosjis(str) + kconv(str, SJIS) + end + module_function :tosjis + + # call-seq: + # Kconv.toutf8(str) => string + # + # Convert <code>str</code> to UTF-8 + def toutf8(str) + kconv(str, UTF8) + end + module_function :toutf8 + + # call-seq: + # Kconv.toutf16(str) => string + # + # Convert <code>str</code> to UTF-16 + def toutf16(str) + kconv(str, UTF16) + end + module_function :toutf16 + + # call-seq: + # Kconv.toutf32(str) => string + # + # Convert <code>str</code> to UTF-32 + def toutf32(str) + kconv(str, UTF32) + end + module_function :toutf32 + + # call-seq: + # Kconv.tolocale => string + # + # Convert <code>self</code> to locale encoding + def tolocale(str) + kconv(str, Encoding.locale_charmap) + end + module_function :tolocale + + # + # guess + # + + # call-seq: + # Kconv.guess(str) => encoding + # + # Guess input encoding by NKF.guess + def guess(str) + ::NKF::guess(str) + end + module_function :guess + + # + # isEncoding + # + + # call-seq: + # Kconv.iseuc(str) => true or false + # + # Returns whether input encoding is EUC-JP or not. + # + # *Note* don't expect this return value is MatchData. + def iseuc(str) + str.dup.force_encoding(EUC).valid_encoding? + end + module_function :iseuc + + # call-seq: + # Kconv.issjis(str) => true or false + # + # Returns whether input encoding is Shift_JIS or not. + def issjis(str) + str.dup.force_encoding(SJIS).valid_encoding? + end + module_function :issjis + + # call-seq: + # Kconv.isjis(str) => true or false + # + # Returns whether input encoding is ISO-2022-JP or not. + def isjis(str) + /\A [\t\n\r\x20-\x7E]* + (?: + (?:\x1b \x28 I [\x21-\x7E]* + |\x1b \x28 J [\x21-\x7E]* + |\x1b \x24 @ (?:[\x21-\x7E]{2})* + |\x1b \x24 B (?:[\x21-\x7E]{2})* + |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})* + )* + \x1b \x28 B [\t\n\r\x20-\x7E]* + )* + \z/nox =~ str.dup.force_encoding('BINARY') ? true : false + end + module_function :isjis + + # call-seq: + # Kconv.isutf8(str) => true or false + # + # Returns whether input encoding is UTF-8 or not. + def isutf8(str) + str.dup.force_encoding(UTF8).valid_encoding? + end + module_function :isutf8 +end + +class String + # call-seq: + # String#kconv(to_enc, from_enc) + # + # Convert <code>self</code> to out_code. + # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv. + def kconv(to_enc, from_enc=nil) + form_enc = self.encoding if !from_enc && self.encoding != Encoding.list[0] + Kconv::kconv(self, to_enc, from_enc) + end + + # + # to Encoding + # + + # call-seq: + # String#tojis => string + # + # Convert <code>self</code> to ISO-2022-JP + def tojis; Kconv.tojis(self) end + + # call-seq: + # String#toeuc => string + # + # Convert <code>self</code> to EUC-JP + def toeuc; Kconv.toeuc(self) end + + # call-seq: + # String#tosjis => string + # + # Convert <code>self</code> to Shift_JIS + def tosjis; Kconv.tosjis(self) end + + # call-seq: + # String#toutf8 => string + # + # Convert <code>self</code> to UTF-8 + def toutf8; Kconv.toutf8(self) end + + # call-seq: + # String#toutf16 => string + # + # Convert <code>self</code> to UTF-16 + def toutf16; Kconv.toutf16(self) end + + # call-seq: + # String#toutf32 => string + # + # Convert <code>self</code> to UTF-32 + def toutf32; Kconv.toutf32(self) end + + # call-seq: + # String#tolocale => string + # + # Convert <code>self</code> to locale encoding + def tolocale; Kconv.tolocale(self) end + + # + # is Encoding + # + + # call-seq: + # String#iseuc => true or false + # + # Returns whether <code>self</code>'s encoding is EUC-JP or not. + def iseuc; Kconv.iseuc(self) end + + # call-seq: + # String#issjis => true or false + # + # Returns whether <code>self</code>'s encoding is Shift_JIS or not. + def issjis; Kconv.issjis(self) end + + # call-seq: + # String#isjis => true or false + # + # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not. + def isjis; Kconv.isjis(self) end + + # call-seq: + # String#isutf8 => true or false + # + # Returns whether <code>self</code>'s encoding is UTF-8 or not. + def isutf8; Kconv.isutf8(self) end +end |