summaryrefslogtreecommitdiff
path: root/lib/rexml
diff options
context:
space:
mode:
authorser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2005-04-09 17:03:32 +0000
committerser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2005-04-09 17:03:32 +0000
commit47bd6a4f37cb14701458233479caaaf3a25397b3 (patch)
treefcbe0f686f6c9968240fffbdcbbc5c51e8e7b0eb /lib/rexml
parentff866f0a8f99cd9ea94157e73c57200baadea15c (diff)
Applied Nobu's patch to the XML document encoding structure in REXML. It
passes all of REXML's native tests as well as a couple of others, and should fix potential threading issues. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml')
-rw-r--r--lib/rexml/encoding.rb34
-rw-r--r--lib/rexml/encodings/EUC-JP.rb41
-rw-r--r--lib/rexml/encodings/ICONV.rb18
-rw-r--r--lib/rexml/encodings/ISO-8859-1.rb24
-rw-r--r--lib/rexml/encodings/SHIFT-JIS.rb43
-rw-r--r--lib/rexml/encodings/UNILE.rb13
-rw-r--r--lib/rexml/encodings/US-ASCII.rb13
-rw-r--r--lib/rexml/encodings/UTF-16.rb13
-rw-r--r--lib/rexml/encodings/UTF-8.rb13
9 files changed, 98 insertions, 114 deletions
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 92ae1e8c21..739002dc4a 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -1,6 +1,16 @@
+# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
module Encoding
- @@uconv_available = false
+ @encoding_methods = {}
+ def self.register(enc, &block)
+ @encoding_methods[enc] = block
+ end
+ def self.apply(obj, enc)
+ @encoding_methods[enc][obj]
+ end
+ def self.encoding_method(enc)
+ @encoding_methods[enc]
+ end
# Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition.
@@ -18,26 +28,24 @@ module REXML
if enc and enc != UTF_8
@encoding = enc.upcase
begin
- load 'rexml/encodings/ICONV.rb'
- instance_eval @@__REXML_encoding_methods
- Iconv::iconv( UTF_8, @encoding, "" )
+ require 'rexml/encodings/ICONV.rb'
+ Encoding.apply(self, "ICONV")
rescue LoadError, Exception => err
- raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
+ raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
begin
- load enc_file
- instance_eval @@__REXML_encoding_methods
+ require enc_file
+ Encoding.apply(self, @encoding)
rescue LoadError
- puts $!.message
- raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." )
+ puts $!.message
+ raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
end
end
else
- enc = UTF_8
- @encoding = enc.upcase
- load 'rexml/encodings/UTF-8.rb'
- instance_eval @@__REXML_encoding_methods
+ @encoding = UTF_8
+ require 'rexml/encodings/UTF-8.rb'
+ Encoding.apply(self, @encoding)
end
ensure
$VERBOSE = old_verbosity
diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb
index a1314d0856..684df0bbd6 100644
--- a/lib/rexml/encodings/EUC-JP.rb
+++ b/lib/rexml/encodings/EUC-JP.rb
@@ -1,37 +1,20 @@
-begin
- require 'iconv'
+require 'uconv'
- module REXML
- module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def decode(str)
- return Iconv::iconv("utf-8", "euc-jp", str)[0]
- end
-
- def encode content
- return Iconv::iconv("euc-jp", "utf-8", content)[0]
- end
- EOL
+module REXML
+ module Encoding
+ def decode_eucjp(str)
+ Uconv::euctou8(str)
end
- end
-rescue LoadError
- begin
- require 'uconv'
- module REXML
- module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def decode(str)
- return Uconv::euctou8(str)
- end
+ def encode_eucjp content
+ Uconv::u8toeuc(content)
+ end
- def encode content
- return Uconv::u8toeuc(content)
- end
- EOL
+ register("EUC-JP") do |obj|
+ class << obj
+ alias decode decode_eucjp
+ alias encode encode_eucjp
end
end
- rescue LoadError
- raise "uconv or iconv is required for Japanese encoding support."
end
end
diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb
index 384758d7b2..172fba7cd1 100644
--- a/lib/rexml/encodings/ICONV.rb
+++ b/lib/rexml/encodings/ICONV.rb
@@ -3,14 +3,20 @@ raise LoadError unless defined? Iconv
module REXML
module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def decode( str )
- return Iconv::iconv("utf-8", @encoding, str)[0]
+ def decode_iconv(str)
+ Iconv.conv(UTF_8, @encoding, str)
end
- def encode( content )
- return Iconv::iconv(@encoding, "utf-8", content)[0]
+ def encode_iconv(content)
+ Iconv.conv(@encoding, UTF_8, content)
+ end
+
+ register("ICONV") do |obj|
+ Iconv.conv(UTF_8, obj.encoding, nil)
+ class << obj
+ alias decode decode_iconv
+ alias encode encode_iconv
+ end
end
- EOL
end
end
diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb
index f4e4527c2d..2873d13bf0 100644
--- a/lib/rexml/encodings/ISO-8859-1.rb
+++ b/lib/rexml/encodings/ISO-8859-1.rb
@@ -1,25 +1,7 @@
+require 'rexml/encodings/US-ASCII'
+
module REXML
module Encoding
- @@__REXML_encoding_methods = %q~
- # Convert from UTF-8
- def encode content
- array_utf8 = content.unpack('U*')
- array_enc = []
- array_utf8.each do |num|
- if num <= 0xFF
- array_enc << num
- else
- # Numeric entity (&#nnnn;); shard by Stefan Scholl
- array_enc.concat "&\##{num};".unpack('C*')
- end
- end
- array_enc.pack('C*')
- end
-
- # Convert to UTF-8
- def decode(str)
- str.unpack('C*').pack('U*')
- end
- ~
+ register("ISO-8859-1", &encoding_method("US-ASCII"))
end
end
diff --git a/lib/rexml/encodings/SHIFT-JIS.rb b/lib/rexml/encodings/SHIFT-JIS.rb
index f17c927864..cbbb88e683 100644
--- a/lib/rexml/encodings/SHIFT-JIS.rb
+++ b/lib/rexml/encodings/SHIFT-JIS.rb
@@ -1,37 +1,22 @@
-begin
- require 'iconv'
+require 'uconv'
- module REXML
- module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def decode(str)
- return Iconv::iconv("utf-8", "shift_jis", str)[0]
- end
-
- def encode content
- return Iconv::iconv("shift_jis", "utf-8", content)[0]
- end
- EOL
+module REXML
+ module Encoding
+ def decode_sjis content
+ Uconv::u8tosjis(content)
end
- end
-rescue LoadError
- begin
- require 'uconv'
- module REXML
- module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def encode(content)
- Uconv::u8tosjis(content)
- end
+ def encode_sjis(str)
+ Uconv::sjistou8(str)
+ end
- def decode(str)
- Uconv::sjistou8(str)
- end
- EOL
+ b = proc do |obj|
+ class << obj
+ alias decode decode_sjis
+ alias encode encode_sjis
end
end
- rescue LoadError
- raise "uconv or iconv is required for Japanese encoding support."
+ register("SHIFT-JIS", &b)
+ register("SHIFT_JIS", &b)
end
end
diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb
index 95141093b5..0560a08361 100644
--- a/lib/rexml/encodings/UNILE.rb
+++ b/lib/rexml/encodings/UNILE.rb
@@ -1,7 +1,6 @@
module REXML
module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def encode content
+ def encode_unile content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@@ -16,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
- def decode(str)
+ def decode_unile(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
@@ -24,6 +23,12 @@ module REXML
}
array_utf8.pack('U*')
end
- EOL
+
+ register(UNILE) do |obj|
+ class << obj
+ alias decode decode_unile
+ alias encode encode_unile
+ end
+ end
end
end
diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb
index fe8f6df303..fb4c217074 100644
--- a/lib/rexml/encodings/US-ASCII.rb
+++ b/lib/rexml/encodings/US-ASCII.rb
@@ -1,8 +1,7 @@
module REXML
module Encoding
- @@__REXML_encoding_methods = %q~
# Convert from UTF-8
- def encode content
+ def encode_ascii content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
@@ -17,9 +16,15 @@ module REXML
end
# Convert to UTF-8
- def decode(str)
+ def decode_ascii(str)
str.unpack('C*').pack('U*')
end
- ~
+
+ register("US-ASCII") do |obj|
+ class << obj
+ alias decode decode_ascii
+ alias encode encode_ascii
+ end
+ end
end
end
diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb
index bd92fce18d..972169755e 100644
--- a/lib/rexml/encodings/UTF-16.rb
+++ b/lib/rexml/encodings/UTF-16.rb
@@ -1,7 +1,6 @@
module REXML
module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def encode content
+ def encode_utf16 content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
@@ -16,7 +15,7 @@ module REXML
array_enc.pack('C*')
end
- def decode(str)
+ def decode_utf16(str)
array_enc=str.unpack('C*')
array_utf8 = []
2.step(array_enc.size-1, 2){|i|
@@ -24,6 +23,12 @@ module REXML
}
array_utf8.pack('U*')
end
- EOL
+
+ register(UTF_16) do |obj|
+ class << obj
+ alias decode decode_utf16
+ alias encode encode_utf16
+ end
+ end
end
end
diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb
index 33a7e490c4..bb08f44100 100644
--- a/lib/rexml/encodings/UTF-8.rb
+++ b/lib/rexml/encodings/UTF-8.rb
@@ -1,13 +1,18 @@
module REXML
module Encoding
- @@__REXML_encoding_methods =<<-EOL
- def encode content
+ def encode_utf8 content
content
end
- def decode(str)
+ def decode_utf8(str)
str
end
- EOL
+
+ register(UTF_8) do |obj|
+ class << obj
+ alias decode decode_utf8
+ alias encode encode_utf8
+ end
+ end
end
end