summaryrefslogtreecommitdiff
path: root/lib/rexml/encoding.rb
diff options
context:
space:
mode:
authorkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-10-30 12:10:56 +0000
committerkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-10-30 12:10:56 +0000
commit994f066f76857a781f8819b8da2c2aeceedbf87b (patch)
treed89e9bcf6ac8d558367f888919de0c4894224063 /lib/rexml/encoding.rb
parent767fe5170d97461be5c79936b467dee3d4eb7179 (diff)
* lib/rexml/encoding.rb: use Ruby native encoding mechnism. [ruby-dev:42464]
* lib/rexml/encodings/: remove. * lib/rexml/document.rb, lib/rexml/formatters/default.rb, lib/rexml/output.rb, lib/rexml/parseexception.rb, lib/rexml/parsers/baseparser.rb, lib/rexml/source.rb, lib/rexml/xmldecl.rb: use Ruby's native Encoding object. * test/rexml/, test/rss/: follow the above encoding chagnes. * NEWS: add REXML's incompatible change about encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29646 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/encoding.rb')
-rw-r--r--lib/rexml/encoding.rb116
1 files changed, 56 insertions, 60 deletions
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 3feffb80f4..0c4a88fbeb 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -1,71 +1,67 @@
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
module Encoding
- @encoding_methods = {}
- def self.register(enc, &block)
- @encoding_methods[enc] = block
- end
- def self.apply(obj, enc)
- @encoding_methods[enc][obj]
- end
- def self.encoding_method(enc)
- @encoding_methods[enc]
- end
-
- # Native, default format is UTF-8, so it is declared here rather than in
- # an encodings/ definition.
- UTF_8 = 'UTF-8'
- UTF_16 = 'UTF-16'
- UNILE = 'UNILE'
-
- # ID ---> Encoding name
- attr_reader :encoding
- def encoding=( enc )
- old_verbosity = $VERBOSE
- begin
- $VERBOSE = false
- enc = enc.nil? ? nil : enc.upcase
- return false if defined? @encoding and enc == @encoding
- if enc and enc != UTF_8
- @encoding = enc
- raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
- @encoding.untaint
- begin
- require 'rexml/encodings/ICONV.rb'
- Encoding.apply(self, "ICONV")
- rescue LoadError, Exception
- begin
- enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
- require enc_file
- Encoding.apply(self, @encoding)
- rescue LoadError => err
- puts err.message
- raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
- end
- end
- else
- @encoding = UTF_8
- require 'rexml/encodings/UTF-8.rb'
- Encoding.apply(self, @encoding)
- end
- ensure
- $VERBOSE = old_verbosity
+ # ID ---> Encoding object
+ attr_reader :encoding
+ def encoding=(encoding)
+ if encoding.is_a?(String)
+ original_encoding = encoding
+ encoding = find_encoding(encoding)
+ unless encoding
+ raise ArgumentError, "Bad encoding name #{original_encoding}"
+ end
+ end
+ return false if defined?(@encoding) and encoding == @encoding
+ if encoding and encoding != ::Encoding::UTF_8
+ @encoding = encoding
+ else
+ @encoding = ::Encoding::UTF_8
end
true
end
- def check_encoding str
+ def check_encoding(xml)
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
- if str[0,2] == "\xfe\xff"
- str[0,2] = ""
- return UTF_16
- elsif str[0,2] == "\xff\xfe"
- str[0,2] = ""
- return UNILE
- end
- str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
- return $3.upcase if $3
- return UTF_8
+ if xml[0, 2] == "\xfe\xff"
+ xml[0, 2] = ""
+ ::Encoding::UTF_16BE
+ elsif xml[0, 2] == "\xff\xfe"
+ xml[0, 2] = ""
+ ::Encoding::UTF_16LE
+ else
+ if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
+ \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
+ encoding_name = $3
+ if /\Autf-16\z/i =~ encoding_name
+ ::Encoding::UTF_16BE
+ else
+ find_encoding(encoding_name)
+ end
+ else
+ ::Encoding::UTF_8
+ end
+ end
end
+
+ def encode(string)
+ string.encode(@encoding)
+ end
+
+ def decode(string)
+ string.encode(::Encoding::UTF_8, @encoding)
+ end
+
+ private
+ def find_encoding(name)
+ case name
+ when "UTF-16"
+ name = "UTF-16BE"
+ when /\Ashift-jis\z/i
+ name = "Shift_JIS"
+ when /\ACP-(\d+)\z/
+ name = "CP#{$1}"
+ end
+ ::Encoding.find(name)
+ end
end
end