summaryrefslogtreecommitdiff
path: root/ext/nkf/lib/kconv.rb
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-01 15:22:25 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-01 15:22:25 +0000
commit7d847f7c377b6dbf85371c3e03599881b76f1954 (patch)
tree620178b6bee52e11f5ba2790031123443aecbb33 /ext/nkf/lib/kconv.rb
parent5f41f87d2ba568cb0ad1fdb523a4c60b15a40613 (diff)
* ext/nkf/nkf-utf8/{nkf.c, utf8tbl.c}: Update nkf.
* ext/nkf/nkf.c: fix documents. * ext/nkf/lib/kconv.rb: fix documents. (Kconv.is*): use valid_encoding?. (Kconv.isjis): defined. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14833 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/nkf/lib/kconv.rb')
-rw-r--r--ext/nkf/lib/kconv.rb127
1 files changed, 62 insertions, 65 deletions
diff --git a/ext/nkf/lib/kconv.rb b/ext/nkf/lib/kconv.rb
index 2f22e32aad..a1c62e7765 100644
--- a/ext/nkf/lib/kconv.rb
+++ b/ext/nkf/lib/kconv.rb
@@ -45,38 +45,6 @@ module Kconv
UNKNOWN = NKF::UNKNOWN
#
- #
- # Private Constants
- #
-
- #Regexp of Encoding
-
- # Regexp of Shift_JIS string (private constant)
- RegexpShiftjis = /\A(?:
- [\x00-\x7f\xa1-\xdf] |
- [\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
- )*\z/nx
-
- # Regexp of EUC-JP string (private constant)
- RegexpEucjp = /\A(?:
- [\x00-\x7f] |
- \x8e [\xa1-\xdf] |
- \x8f [\xa1-\xfe] [\xa1-\xfe] |
- [\xa1-\xfe] [\xa1-\xfe]
- )*\z/nx
-
- # Regexp of UTF-8 string (private constant)
- RegexpUtf8 = /\A(?:
- [\x00-\x7f] |
- [\xc2-\xdf] [\x80-\xbf] |
- \xe0 [\xa0-\xbf] [\x80-\xbf] |
- [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
- \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
- [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
- \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
- )*\z/nx
-
- #
# Public Methods
#
@@ -99,7 +67,7 @@ module Kconv
#
# call-seq:
- # Kconv.tojis(str) -> string
+ # Kconv.tojis(str) => string
#
# Convert <code>str</code> to ISO-2022-JP
def tojis(str)
@@ -108,7 +76,7 @@ module Kconv
module_function :tojis
# call-seq:
- # Kconv.toeuc(str) -> string
+ # Kconv.toeuc(str) => string
#
# Convert <code>str</code> to EUC-JP
def toeuc(str)
@@ -117,7 +85,7 @@ module Kconv
module_function :toeuc
# call-seq:
- # Kconv.tosjis(str) -> string
+ # Kconv.tosjis(str) => string
#
# Convert <code>str</code> to Shift_JIS
def tosjis(str)
@@ -126,7 +94,7 @@ module Kconv
module_function :tosjis
# call-seq:
- # Kconv.toutf8(str) -> string
+ # Kconv.toutf8(str) => string
#
# Convert <code>str</code> to UTF-8
def toutf8(str)
@@ -135,7 +103,7 @@ module Kconv
module_function :toutf8
# call-seq:
- # Kconv.toutf16(str) -> string
+ # Kconv.toutf16(str) => string
#
# Convert <code>str</code> to UTF-16
def toutf16(str)
@@ -144,7 +112,7 @@ module Kconv
module_function :toutf16
# call-seq:
- # Kconv.toutf32(str) -> string
+ # Kconv.toutf32(str) => string
#
# Convert <code>str</code> to UTF-32
def toutf32(str)
@@ -152,12 +120,21 @@ module Kconv
end
module_function :toutf32
+ # call-seq:
+ # Kconv.tolocale => string
+ #
+ # Convert <code>self</code> to locale encoding
+ def tolocale
+ kconv(str, Encoding.locale_charmap)
+ end
+ module_function :tolocale
+
#
# guess
#
# call-seq:
- # Kconv.guess(str) -> integer
+ # Kconv.guess(str) => encoding
#
# Guess input encoding by NKF.guess
def guess(str)
@@ -170,38 +147,52 @@ module Kconv
#
# call-seq:
- # Kconv.iseuc(str) -> obj or nil
+ # Kconv.iseuc(str) => true or false
#
# Returns whether input encoding is EUC-JP or not.
#
# *Note* don't expect this return value is MatchData.
def iseuc(str)
- RegexpEucjp.match( str )
+ str.dup.force_encoding(EUC).valid_encoding?
end
module_function :iseuc
# call-seq:
- # Kconv.issjis(str) -> obj or nil
+ # Kconv.issjis(str) => true or false
#
# Returns whether input encoding is Shift_JIS or not.
- #
- # *Note* don't expect this return value is MatchData.
def issjis(str)
- RegexpShiftjis.match( str )
+ str.dup.force_encoding(SJIS).valid_encoding?
end
module_function :issjis
# call-seq:
- # Kconv.isutf8(str) -> obj or nil
+ # Kconv.isjis(str) => true or false
+ #
+ # Returns whether input encoding is ISO-2022-JP or not.
+ def isjis(str)
+ /\A [\t\n\r\x20-\x7E]*
+ (?:
+ (?:\x1b \x28 I [\x21-\x7E]*
+ |\x1b \x28 J [\x21-\x7E]*
+ |\x1b \x24 @ (?:[\x21-\x7E]{2})*
+ |\x1b \x24 B (?:[\x21-\x7E]{2})*
+ |\x1b \x24 \x28 D (?:[\x21-\x7E]{2})*
+ )*
+ \x1b \x28 B [\t\n\r\x20-\x7E]*
+ )*
+ \z/nox =~ str.dup.force_encoding(nil) ? true : false
+ end
+ module_function :isjis
+
+ # call-seq:
+ # Kconv.isutf8(str) => true or false
#
# Returns whether input encoding is UTF-8 or not.
- #
- # *Note* don't expect this return value is MatchData.
def isutf8(str)
- RegexpUtf8.match( str )
+ str.dup.force_encoding(UTF8).valid_encoding?
end
module_function :isutf8
-
end
class String
@@ -220,66 +211,72 @@ class String
#
# call-seq:
- # String#tojis -> string
+ # String#tojis => string
#
# Convert <code>self</code> to ISO-2022-JP
def tojis; Kconv.tojis(self) end
# call-seq:
- # String#toeuc -> string
+ # String#toeuc => string
#
# Convert <code>self</code> to EUC-JP
def toeuc; Kconv.toeuc(self) end
# call-seq:
- # String#tosjis -> string
+ # String#tosjis => string
#
# Convert <code>self</code> to Shift_JIS
def tosjis; Kconv.tosjis(self) end
# call-seq:
- # String#toutf8 -> string
+ # String#toutf8 => string
#
# Convert <code>self</code> to UTF-8
def toutf8; Kconv.toutf8(self) end
# call-seq:
- # String#toutf16 -> string
+ # String#toutf16 => string
#
# Convert <code>self</code> to UTF-16
def toutf16; Kconv.toutf16(self) end
# call-seq:
- # String#toutf32 -> string
+ # String#toutf32 => string
#
# Convert <code>self</code> to UTF-32
def toutf32; Kconv.toutf32(self) end
+ # call-seq:
+ # String#tolocale => string
+ #
+ # Convert <code>self</code> to locale encoding
+ def tolocale; Kconv.tolocale(self) end
+
#
# is Encoding
#
# call-seq:
- # String#iseuc -> obj or nil
+ # String#iseuc => true or false
#
# Returns whether <code>self</code>'s encoding is EUC-JP or not.
- #
- # *Note* don't expect this return value is MatchData.
def iseuc; Kconv.iseuc(self) end
# call-seq:
- # String#issjis -> obj or nil
+ # String#issjis => true or false
#
# Returns whether <code>self</code>'s encoding is Shift_JIS or not.
- #
- # *Note* don't expect this return value is MatchData.
def issjis; Kconv.issjis(self) end
# call-seq:
- # String#isutf8 -> obj or nil
+ # String#isjis => true or false
#
- # Returns whether <code>self</code>'s encoding is UTF-8 or not.
+ # Returns whether <code>self</code>'s encoding is ISO-2022-JP or not.
+ def isjis; Kconv.isjis(self) end
+
+ # call-seq:
+ # String#isutf8 => true or false
#
- # *Note* don't expect this return value is MatchData.
+ # Returns whether <code>self</code>'s encoding is UTF-8 or not.
def isutf8; Kconv.isutf8(self) end
end