diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-10-10 12:54:46 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-10-10 12:54:46 +0000 |
commit | 7d21c237ccd46ec1d56639ce53b5882bf97d9de3 (patch) | |
tree | e9667617b9228d24dbb9833bec2c3d1be0293cb9 /lib/rexml/encodings | |
parent | 662532be008867582fc86dd813dcf8f6a79136eb (diff) |
* Changes to the encoding mechanism. If iconv is found, it is used first
for encoding changes. This should be the case on all 1.8 installations.
When it isn't found (<1.6), the native REXML encoding mechanism is used.
This cleaned out some files, and tightened up the code a bit; and iconv
should be faster than the pure Ruby code.
* Changed deprecated assert_not_nil to assert throughout the tests.
* Parse exceptions are a little more verbose, and extend RuntimeError.
* Bug fixes to XPathParser
* The Light API is still shifting, like the sands of the desert.
* Fixed a new Ruby 1.8.0 warning, added some speed optimizations, and
tightened error reporting in the base parser
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/encodings')
-rw-r--r-- | lib/rexml/encodings/EUC-JP.rb | 24 | ||||
-rw-r--r-- | lib/rexml/encodings/EUC-JP_decl.rb | 6 | ||||
-rw-r--r-- | lib/rexml/encodings/ICONV.rb | 14 | ||||
-rw-r--r-- | lib/rexml/encodings/ISO-8859-1.rb | 4 | ||||
-rw-r--r-- | lib/rexml/encodings/ISO-8859-1_decl.rb | 6 | ||||
-rw-r--r-- | lib/rexml/encodings/Shift-JIS_decl.rb | 6 | ||||
-rw-r--r-- | lib/rexml/encodings/UNILE.rb | 4 | ||||
-rw-r--r-- | lib/rexml/encodings/UNILE_decl.rb | 6 | ||||
-rw-r--r-- | lib/rexml/encodings/US-ASCII.rb | 4 | ||||
-rw-r--r-- | lib/rexml/encodings/US-ASCII_decl.rb | 6 | ||||
-rw-r--r-- | lib/rexml/encodings/UTF-16.rb | 4 | ||||
-rw-r--r-- | lib/rexml/encodings/UTF-16_decl.rb | 6 | ||||
-rw-r--r-- | lib/rexml/encodings/UTF-8.rb | 11 |
13 files changed, 45 insertions, 56 deletions
diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb index def760b303..8b146e5f0a 100644 --- a/lib/rexml/encodings/EUC-JP.rb +++ b/lib/rexml/encodings/EUC-JP.rb @@ -3,30 +3,30 @@ begin module REXML module Encoding - def from_euc_jp(str) + def decode(str) return Uconv::euctou8(str) end - def to_euc_jp content + def encode content return Uconv::u8toeuc(content) end end end rescue LoadError begin - require 'iconv' - module REXML - module Encoding - def from_euc_jp(str) - return Iconv::iconv("utf-8", "euc-jp", str).join('') - end + require 'iconv' + module REXML + module Encoding + def decode(str) + return Iconv::iconv("utf-8", "euc-jp", str)[0] + end - def to_euc_jp content - return Iconv::iconv("euc-jp", "utf-8", content).join('') + def encode content + return Iconv::iconv("euc-jp", "utf-8", content)[0] + end end end - end rescue LoadError - raise "uconv or iconv is required for Japanese encoding support." + raise "uconv or iconv is required for Japanese encoding support." end end diff --git a/lib/rexml/encodings/EUC-JP_decl.rb b/lib/rexml/encodings/EUC-JP_decl.rb deleted file mode 100644 index 4c7cd828a6..0000000000 --- a/lib/rexml/encodings/EUC-JP_decl.rb +++ /dev/null @@ -1,6 +0,0 @@ -module REXML - module Encoding - EUC_JP = 'EUC-JP' - claim( EUC_JP ) - end -end diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb new file mode 100644 index 0000000000..f1b5c80b87 --- /dev/null +++ b/lib/rexml/encodings/ICONV.rb @@ -0,0 +1,14 @@ +require "iconv" +raise LoadError unless defined? Iconv + +module REXML + module Encoding + def decode( str ) + return Iconv::iconv(UTF_8, @encoding, str)[0] + end + + def encode( content ) + return Iconv::iconv(@encoding, UTF_8, content)[0] + end + end +end diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb index 98c5aff3b2..0cb9afd147 100644 --- a/lib/rexml/encodings/ISO-8859-1.rb +++ b/lib/rexml/encodings/ISO-8859-1.rb @@ -1,7 +1,7 @@ module REXML module Encoding # Convert from UTF-8 - def to_iso_8859_1 content + def encode content array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| @@ -16,7 +16,7 @@ module REXML end # Convert to UTF-8 - def from_iso_8859_1(str) + def decode(str) str.unpack('C*').pack('U*') end end diff --git a/lib/rexml/encodings/ISO-8859-1_decl.rb b/lib/rexml/encodings/ISO-8859-1_decl.rb deleted file mode 100644 index a738d30472..0000000000 --- a/lib/rexml/encodings/ISO-8859-1_decl.rb +++ /dev/null @@ -1,6 +0,0 @@ -module REXML - module Encoding - ISO_8859_1 = 'ISO-8859-1' - claim( ISO_8859_1 ) - end -end diff --git a/lib/rexml/encodings/Shift-JIS_decl.rb b/lib/rexml/encodings/Shift-JIS_decl.rb deleted file mode 100644 index 66f650144a..0000000000 --- a/lib/rexml/encodings/Shift-JIS_decl.rb +++ /dev/null @@ -1,6 +0,0 @@ -module REXML - module Encoding - claim( 'Shift-JIS' ) - claim( 'Shift_JIS' ) - end -end diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb index 74bed14340..e57a784061 100644 --- a/lib/rexml/encodings/UNILE.rb +++ b/lib/rexml/encodings/UNILE.rb @@ -1,6 +1,6 @@ module REXML module Encoding - def to_unile content + def encode content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| @@ -15,7 +15,7 @@ module REXML array_enc.pack('C*') end - def from_unile(str) + def decode(str) array_enc=str.unpack('C*') array_utf8 = [] 2.step(array_enc.size-1, 2){|i| diff --git a/lib/rexml/encodings/UNILE_decl.rb b/lib/rexml/encodings/UNILE_decl.rb deleted file mode 100644 index 9e1c11dc03..0000000000 --- a/lib/rexml/encodings/UNILE_decl.rb +++ /dev/null @@ -1,6 +0,0 @@ -module REXML - module Encoding - UNILE = 'UNILE' - claim( UNILE, /^\377\376/ ) - end -end diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb index 4ca2c82a83..0cb9afd147 100644 --- a/lib/rexml/encodings/US-ASCII.rb +++ b/lib/rexml/encodings/US-ASCII.rb @@ -1,7 +1,7 @@ module REXML module Encoding # Convert from UTF-8 - def to_us_ascii content + def encode content array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| @@ -16,7 +16,7 @@ module REXML end # Convert to UTF-8 - def from_us_ascii(str) + def decode(str) str.unpack('C*').pack('U*') end end diff --git a/lib/rexml/encodings/US-ASCII_decl.rb b/lib/rexml/encodings/US-ASCII_decl.rb deleted file mode 100644 index 1e69234fff..0000000000 --- a/lib/rexml/encodings/US-ASCII_decl.rb +++ /dev/null @@ -1,6 +0,0 @@ -module REXML - module Encoding - US_ASCII = 'US-ASCII' - claim( US_ASCII ) - end -end diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb index 2aeef76a0c..31f2d81a5b 100644 --- a/lib/rexml/encodings/UTF-16.rb +++ b/lib/rexml/encodings/UTF-16.rb @@ -1,6 +1,6 @@ module REXML module Encoding - def to_utf_16 content + def encode content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| @@ -15,7 +15,7 @@ module REXML array_enc.pack('C*') end - def from_utf_16(str) + def decode(str) array_enc=str.unpack('C*') array_utf8 = [] 2.step(arrayEnc.size-1, 2){|i| diff --git a/lib/rexml/encodings/UTF-16_decl.rb b/lib/rexml/encodings/UTF-16_decl.rb deleted file mode 100644 index f405a9f259..0000000000 --- a/lib/rexml/encodings/UTF-16_decl.rb +++ /dev/null @@ -1,6 +0,0 @@ -module REXML - module Encoding - UTF_16 = 'UTF-16' - claim( UTF_16, /^\376\377/ ) - end -end diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb new file mode 100644 index 0000000000..96fee4c4c0 --- /dev/null +++ b/lib/rexml/encodings/UTF-8.rb @@ -0,0 +1,11 @@ +module REXML + module Encoding + def encode content + content + end + + def decode(str) + str + end + end +end |