From 47bd6a4f37cb14701458233479caaaf3a25397b3 Mon Sep 17 00:00:00 2001 From: ser Date: Sat, 9 Apr 2005 17:03:32 +0000 Subject: Applied Nobu's patch to the XML document encoding structure in REXML. It passes all of REXML's native tests as well as a couple of others, and should fix potential threading issues. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8293 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/encoding.rb | 34 +++++++++++++++++++------------ lib/rexml/encodings/EUC-JP.rb | 41 +++++++++++-------------------------- lib/rexml/encodings/ICONV.rb | 18 ++++++++++------ lib/rexml/encodings/ISO-8859-1.rb | 24 +++------------------- lib/rexml/encodings/SHIFT-JIS.rb | 43 +++++++++++++-------------------------- lib/rexml/encodings/UNILE.rb | 13 ++++++++---- lib/rexml/encodings/US-ASCII.rb | 13 ++++++++---- lib/rexml/encodings/UTF-16.rb | 13 ++++++++---- lib/rexml/encodings/UTF-8.rb | 13 ++++++++---- 9 files changed, 98 insertions(+), 114 deletions(-) diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 92ae1e8c21..739002dc4a 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -1,6 +1,16 @@ +# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2 module REXML module Encoding - @@uconv_available = false + @encoding_methods = {} + def self.register(enc, &block) + @encoding_methods[enc] = block + end + def self.apply(obj, enc) + @encoding_methods[enc][obj] + end + def self.encoding_method(enc) + @encoding_methods[enc] + end # Native, default format is UTF-8, so it is declared here rather than in # an encodings/ definition. @@ -18,26 +28,24 @@ module REXML if enc and enc != UTF_8 @encoding = enc.upcase begin - load 'rexml/encodings/ICONV.rb' - instance_eval @@__REXML_encoding_methods - Iconv::iconv( UTF_8, @encoding, "" ) + require 'rexml/encodings/ICONV.rb' + Encoding.apply(self, "ICONV") rescue LoadError, Exception => err - raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ + raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ @encoding.untaint enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) begin - load enc_file - instance_eval @@__REXML_encoding_methods + require enc_file + Encoding.apply(self, @encoding) rescue LoadError - puts $!.message - raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." ) + puts $!.message + raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." end end else - enc = UTF_8 - @encoding = enc.upcase - load 'rexml/encodings/UTF-8.rb' - instance_eval @@__REXML_encoding_methods + @encoding = UTF_8 + require 'rexml/encodings/UTF-8.rb' + Encoding.apply(self, @encoding) end ensure $VERBOSE = old_verbosity diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb index a1314d0856..684df0bbd6 100644 --- a/lib/rexml/encodings/EUC-JP.rb +++ b/lib/rexml/encodings/EUC-JP.rb @@ -1,37 +1,20 @@ -begin - require 'iconv' +require 'uconv' - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode(str) - return Iconv::iconv("utf-8", "euc-jp", str)[0] - end - - def encode content - return Iconv::iconv("euc-jp", "utf-8", content)[0] - end - EOL +module REXML + module Encoding + def decode_eucjp(str) + Uconv::euctou8(str) end - end -rescue LoadError - begin - require 'uconv' - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode(str) - return Uconv::euctou8(str) - end + def encode_eucjp content + Uconv::u8toeuc(content) + end - def encode content - return Uconv::u8toeuc(content) - end - EOL + register("EUC-JP") do |obj| + class << obj + alias decode decode_eucjp + alias encode encode_eucjp end end - rescue LoadError - raise "uconv or iconv is required for Japanese encoding support." end end diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb index 384758d7b2..172fba7cd1 100644 --- a/lib/rexml/encodings/ICONV.rb +++ b/lib/rexml/encodings/ICONV.rb @@ -3,14 +3,20 @@ raise LoadError unless defined? Iconv module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode( str ) - return Iconv::iconv("utf-8", @encoding, str)[0] + def decode_iconv(str) + Iconv.conv(UTF_8, @encoding, str) end - def encode( content ) - return Iconv::iconv(@encoding, "utf-8", content)[0] + def encode_iconv(content) + Iconv.conv(@encoding, UTF_8, content) + end + + register("ICONV") do |obj| + Iconv.conv(UTF_8, obj.encoding, nil) + class << obj + alias decode decode_iconv + alias encode encode_iconv + end end - EOL end end diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb index f4e4527c2d..2873d13bf0 100644 --- a/lib/rexml/encodings/ISO-8859-1.rb +++ b/lib/rexml/encodings/ISO-8859-1.rb @@ -1,25 +1,7 @@ +require 'rexml/encodings/US-ASCII' + module REXML module Encoding - @@__REXML_encoding_methods = %q~ - # Convert from UTF-8 - def encode content - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - if num <= 0xFF - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - array_enc.pack('C*') - end - - # Convert to UTF-8 - def decode(str) - str.unpack('C*').pack('U*') - end - ~ + register("ISO-8859-1", &encoding_method("US-ASCII")) end end diff --git a/lib/rexml/encodings/SHIFT-JIS.rb b/lib/rexml/encodings/SHIFT-JIS.rb index f17c927864..cbbb88e683 100644 --- a/lib/rexml/encodings/SHIFT-JIS.rb +++ b/lib/rexml/encodings/SHIFT-JIS.rb @@ -1,37 +1,22 @@ -begin - require 'iconv' +require 'uconv' - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode(str) - return Iconv::iconv("utf-8", "shift_jis", str)[0] - end - - def encode content - return Iconv::iconv("shift_jis", "utf-8", content)[0] - end - EOL +module REXML + module Encoding + def decode_sjis content + Uconv::u8tosjis(content) end - end -rescue LoadError - begin - require 'uconv' - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode(content) - Uconv::u8tosjis(content) - end + def encode_sjis(str) + Uconv::sjistou8(str) + end - def decode(str) - Uconv::sjistou8(str) - end - EOL + b = proc do |obj| + class << obj + alias decode decode_sjis + alias encode encode_sjis end end - rescue LoadError - raise "uconv or iconv is required for Japanese encoding support." + register("SHIFT-JIS", &b) + register("SHIFT_JIS", &b) end end diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb index 95141093b5..0560a08361 100644 --- a/lib/rexml/encodings/UNILE.rb +++ b/lib/rexml/encodings/UNILE.rb @@ -1,7 +1,6 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode content + def encode_unile content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| @@ -16,7 +15,7 @@ module REXML array_enc.pack('C*') end - def decode(str) + def decode_unile(str) array_enc=str.unpack('C*') array_utf8 = [] 2.step(array_enc.size-1, 2){|i| @@ -24,6 +23,12 @@ module REXML } array_utf8.pack('U*') end - EOL + + register(UNILE) do |obj| + class << obj + alias decode decode_unile + alias encode encode_unile + end + end end end diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb index fe8f6df303..fb4c217074 100644 --- a/lib/rexml/encodings/US-ASCII.rb +++ b/lib/rexml/encodings/US-ASCII.rb @@ -1,8 +1,7 @@ module REXML module Encoding - @@__REXML_encoding_methods = %q~ # Convert from UTF-8 - def encode content + def encode_ascii content array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| @@ -17,9 +16,15 @@ module REXML end # Convert to UTF-8 - def decode(str) + def decode_ascii(str) str.unpack('C*').pack('U*') end - ~ + + register("US-ASCII") do |obj| + class << obj + alias decode decode_ascii + alias encode encode_ascii + end + end end end diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb index bd92fce18d..972169755e 100644 --- a/lib/rexml/encodings/UTF-16.rb +++ b/lib/rexml/encodings/UTF-16.rb @@ -1,7 +1,6 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode content + def encode_utf16 content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| @@ -16,7 +15,7 @@ module REXML array_enc.pack('C*') end - def decode(str) + def decode_utf16(str) array_enc=str.unpack('C*') array_utf8 = [] 2.step(array_enc.size-1, 2){|i| @@ -24,6 +23,12 @@ module REXML } array_utf8.pack('U*') end - EOL + + register(UTF_16) do |obj| + class << obj + alias decode decode_utf16 + alias encode encode_utf16 + end + end end end diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb index 33a7e490c4..bb08f44100 100644 --- a/lib/rexml/encodings/UTF-8.rb +++ b/lib/rexml/encodings/UTF-8.rb @@ -1,13 +1,18 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode content + def encode_utf8 content content end - def decode(str) + def decode_utf8(str) str end - EOL + + register(UTF_8) do |obj| + class << obj + alias decode decode_utf8 + alias encode encode_utf8 + end + end end end -- cgit v1.2.3