summaryrefslogtreecommitdiff
path: root/lib/rexml/encodings
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-10-10 12:54:46 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2003-10-10 12:54:46 +0000
commit7d21c237ccd46ec1d56639ce53b5882bf97d9de3 (patch)
treee9667617b9228d24dbb9833bec2c3d1be0293cb9 /lib/rexml/encodings
parent662532be008867582fc86dd813dcf8f6a79136eb (diff)
* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations. When it isn't found (<1.6), the native REXML encoding mechanism is used. This cleaned out some files, and tightened up the code a bit; and iconv should be faster than the pure Ruby code. * Changed deprecated assert_not_nil to assert throughout the tests. * Parse exceptions are a little more verbose, and extend RuntimeError. * Bug fixes to XPathParser * The Light API is still shifting, like the sands of the desert. * Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and tightened error reporting in the base parser git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/encodings')
-rw-r--r--lib/rexml/encodings/EUC-JP.rb24
-rw-r--r--lib/rexml/encodings/EUC-JP_decl.rb6
-rw-r--r--lib/rexml/encodings/ICONV.rb14
-rw-r--r--lib/rexml/encodings/ISO-8859-1.rb4
-rw-r--r--lib/rexml/encodings/ISO-8859-1_decl.rb6
-rw-r--r--lib/rexml/encodings/Shift-JIS_decl.rb6
-rw-r--r--lib/rexml/encodings/UNILE.rb4
-rw-r--r--lib/rexml/encodings/UNILE_decl.rb6
-rw-r--r--lib/rexml/encodings/US-ASCII.rb4
-rw-r--r--lib/rexml/encodings/US-ASCII_decl.rb6
-rw-r--r--lib/rexml/encodings/UTF-16.rb4
-rw-r--r--lib/rexml/encodings/UTF-16_decl.rb6
-rw-r--r--lib/rexml/encodings/UTF-8.rb11
13 files changed, 45 insertions, 56 deletions
diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb
index def760b303..8b146e5f0a 100644
--- a/lib/rexml/encodings/EUC-JP.rb
+++ b/lib/rexml/encodings/EUC-JP.rb
@@ -3,30 +3,30 @@ begin
module REXML
module Encoding
- def from_euc_jp(str)
+ def decode(str)
return Uconv::euctou8(str)
end
- def to_euc_jp content
+ def encode content
return Uconv::u8toeuc(content)
end
end
end
rescue LoadError
begin
- require 'iconv'
- module REXML
- module Encoding
- def from_euc_jp(str)
- return Iconv::iconv("utf-8", "euc-jp", str).join('')
- end
+ require 'iconv'
+ module REXML
+ module Encoding
+ def decode(str)
+ return Iconv::iconv("utf-8", "euc-jp", str)[0]
+ end
- def to_euc_jp content
- return Iconv::iconv("euc-jp", "utf-8", content).join('')
+ def encode content
+ return Iconv::iconv("euc-jp", "utf-8", content)[0]
+ end
end
end
- end
rescue LoadError
- raise "uconv or iconv is required for Japanese encoding support."
+ raise "uconv or iconv is required for Japanese encoding support."
end
end
diff --git a/lib/rexml/encodings/EUC-JP_decl.rb b/lib/rexml/encodings/EUC-JP_decl.rb
deleted file mode 100644
index 4c7cd828a6..0000000000
--- a/lib/rexml/encodings/EUC-JP_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- EUC_JP = 'EUC-JP'
- claim( EUC_JP )
- end
-end
diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb
new file mode 100644
index 0000000000..f1b5c80b87
--- /dev/null
+++ b/lib/rexml/encodings/ICONV.rb
@@ -0,0 +1,14 @@
+require "iconv"
+raise LoadError unless defined? Iconv
+
+module REXML
+ module Encoding
+ def decode( str )
+ return Iconv::iconv(UTF_8, @encoding, str)[0]
+ end
+
+ def encode( content )
+ return Iconv::iconv(@encoding, UTF_8, content)[0]
+ end
+ end
+end
diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb
index 98c5aff3b2..0cb9afd147 100644
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ b/lib/rexml/encodings/ISO-8859-1.rb
@@ -1,7 +1,7 @@
module REXML
module Encoding
# Convert from UTF-8
- def to_iso_8859_1 content
+ def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@@ -16,7 +16,7 @@ module REXML
end
# Convert to UTF-8
- def from_iso_8859_1(str)
+ def decode(str)
str.unpack('C*').pack('U*')
end
end
diff --git a/lib/rexml/encodings/ISO-8859-1_decl.rb b/lib/rexml/encodings/ISO-8859-1_decl.rb
deleted file mode 100644
index a738d30472..0000000000
--- a/lib/rexml/encodings/ISO-8859-1_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- ISO_8859_1 = 'ISO-8859-1'
- claim( ISO_8859_1 )
- end
-end
diff --git a/lib/rexml/encodings/Shift-JIS_decl.rb b/lib/rexml/encodings/Shift-JIS_decl.rb
deleted file mode 100644
index 66f650144a..0000000000
--- a/lib/rexml/encodings/Shift-JIS_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- claim( 'Shift-JIS' )
- claim( 'Shift_JIS' )
- end
-end
diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb
index 74bed14340..e57a784061 100644
--- a/lib/rexml/encodings/UNILE.rb
+++ b/lib/rexml/encodings/UNILE.rb
@@ -1,6 +1,6 @@
module REXML
module Encoding
- def to_unile content
+ def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
- def from_unile(str)
+ def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
diff --git a/lib/rexml/encodings/UNILE_decl.rb b/lib/rexml/encodings/UNILE_decl.rb
deleted file mode 100644
index 9e1c11dc03..0000000000
--- a/lib/rexml/encodings/UNILE_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- UNILE = 'UNILE'
- claim( UNILE, /^\377\376/ )
- end
-end
diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb
index 4ca2c82a83..0cb9afd147 100644
--- a/lib/rexml/encodings/US-ASCII.rb
+++ b/lib/rexml/encodings/US-ASCII.rb
@@ -1,7 +1,7 @@
module REXML
module Encoding
# Convert from UTF-8
- def to_us_ascii content
+ def encode content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@@ -16,7 +16,7 @@ module REXML
end
# Convert to UTF-8
- def from_us_ascii(str)
+ def decode(str)
str.unpack('C*').pack('U*')
end
end
diff --git a/lib/rexml/encodings/US-ASCII_decl.rb b/lib/rexml/encodings/US-ASCII_decl.rb
deleted file mode 100644
index 1e69234fff..0000000000
--- a/lib/rexml/encodings/US-ASCII_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- US_ASCII = 'US-ASCII'
- claim( US_ASCII )
- end
-end
diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb
index 2aeef76a0c..31f2d81a5b 100644
--- a/lib/rexml/encodings/UTF-16.rb
+++ b/lib/rexml/encodings/UTF-16.rb
@@ -1,6 +1,6 @@
module REXML
module Encoding
- def to_utf_16 content
+ def encode content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@@ -15,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
- def from_utf_16(str)
+ def decode(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(arrayEnc.size-1, 2){|i|
diff --git a/lib/rexml/encodings/UTF-16_decl.rb b/lib/rexml/encodings/UTF-16_decl.rb
deleted file mode 100644
index f405a9f259..0000000000
--- a/lib/rexml/encodings/UTF-16_decl.rb
+++ /dev/null
@@ -1,6 +0,0 @@
-module REXML
- module Encoding
- UTF_16 = 'UTF-16'
- claim( UTF_16, /^\376\377/ )
- end
-end
diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb
new file mode 100644
index 0000000000..96fee4c4c0
--- /dev/null
+++ b/lib/rexml/encodings/UTF-8.rb
@@ -0,0 +1,11 @@
+module REXML
+ module Encoding
+ def encode content
+ content
+ end
+
+ def decode(str)
+ str
+ end
+ end
+end