From f25ff846f6884e202d13ab28e3e10c917b9cdf31 Mon Sep 17 00:00:00 2001 From: naruse Date: Wed, 2 Mar 2011 15:36:48 +0000 Subject: * lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding a String which means the name of the encoding. this partially revert r29646. * lib/rexml/document.rb: follow above. * lib/rexml/output.rb: ditto. * lib/rexml/parsers/baseparser.rb: ditto. * lib/rexml/source.rb: ditto. * lib/rexml/xmldecl.rb: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/document.rb | 2 +- lib/rexml/encoding.rb | 42 +++++++++++++++++++---------------------- lib/rexml/formatters/default.rb | 2 +- lib/rexml/output.rb | 2 +- lib/rexml/parsers/baseparser.rb | 2 +- lib/rexml/source.rb | 6 ++---- lib/rexml/xmldecl.rb | 13 +------------ 7 files changed, 26 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 68a744d9e5..790a1c78db 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -184,7 +184,7 @@ module REXML # that IE's limited abilities can handle. This hack inserts a space # before the /> on empty tags. Defaults to false def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output) + if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output) output = Output.new( output, xml_decl.encoding ) end formatter = if indent > -1 diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 3e7bdfb6aa..d1d5172841 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -1,8 +1,9 @@ module REXML module Encoding - # ID ---> Encoding object + # ID ---> Encoding name attr_reader :encoding def encoding=(encoding) + encoding = encoding.name if encoding.is_a?(Encoding) if encoding.is_a?(String) original_encoding = encoding encoding = find_encoding(encoding) @@ -11,35 +12,25 @@ module REXML end end return false if defined?(@encoding) and encoding == @encoding - if encoding and encoding != ::Encoding::UTF_8 - @encoding = encoding + if encoding + @encoding = encoding.upcase else - @encoding = ::Encoding::UTF_8 + @encoding = 'UTF-8' end true end def check_encoding(xml) - # We have to recognize UTF-16, LSB UTF-16, and UTF-8 + # We have to recognize UTF-16BE, UTF-16LE, and UTF-8 if xml[0, 2] == "\xfe\xff" xml[0, 2] = "" - ::Encoding::UTF_16BE + return 'UTF-16BE' elsif xml[0, 2] == "\xff\xfe" xml[0, 2] = "" - ::Encoding::UTF_16LE - else - if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1 - \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml - encoding_name = $3 - if /\Autf-16\z/i =~ encoding_name - ::Encoding::UTF_16BE - else - find_encoding(encoding_name) - end - else - ::Encoding::UTF_8 - end + return 'UTF-16LE' end + xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m + return $3 ? $3.upcase : 'UTF-8' end def encode(string) @@ -53,14 +44,19 @@ module REXML private def find_encoding(name) case name - when "UTF-16" - name = "UTF-16BE" when /\Ashift-jis\z/i - name = "Shift_JIS" + return "SHIFT_JIS" when /\ACP-(\d+)\z/ name = "CP#{$1}" + when /\AUTF-8\z/i + return name + end + begin + ::Encoding::Converter.search_convpath(name, 'UTF-8') + rescue ::Encoding::ConverterNotFoundError + return nil end - ::Encoding.find(name) + name end end end diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb index ec4149047d..574c821f96 100644 --- a/lib/rexml/formatters/default.rb +++ b/lib/rexml/formatters/default.rb @@ -22,7 +22,7 @@ module REXML case node when Document - if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output) + if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output) output = Output.new( output, node.xml_decl.encoding ) end write_document( node, output ) diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb index 752f6e1d40..50333ba177 100644 --- a/lib/rexml/output.rb +++ b/lib/rexml/output.rb @@ -10,7 +10,7 @@ module REXML @output = real_IO self.encoding = encd - @to_utf = (@encoding != ::Encoding::UTF_8) + @to_utf = encd != 'UTF-8' end def <<( content ) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index ee8b160ce5..0f1480b07d 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -248,7 +248,7 @@ module REXML @document_status = :after_doctype @source.read if @source.buffer.size<2 md = @source.match(/\s*/um, true) - if @source.encoding == ::Encoding::UTF_8 + if @source.encoding == "UTF-8" @source.buffer.force_encoding(::Encoding::UTF_8) end end diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 227b0c56c4..112393cfd4 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -54,14 +54,12 @@ module REXML def encoding=(enc) return unless super @line_break = encode( '>' ) - if @encoding != ::Encoding::UTF_8 + if @encoding != 'UTF-8' @buffer = decode(@buffer) @to_utf = true else @to_utf = false - if @buffer.respond_to? :force_encoding - @buffer.force_encoding ::Encoding::UTF_8 - end + @buffer.force_encoding ::Encoding::UTF_8 end end diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index 81d3057732..361e4b7106 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -109,20 +109,9 @@ module REXML end private - def normalized_encoding_name(_encoding) - if _encoding == ::Encoding::UTF_16BE - "UTF-16" - else - return _encoding.name - end - end - def content(enc) rv = "version='#@version'" - if @writeencoding || enc.to_s !~ /\Autf-8\z/i - encoding_name = normalized_encoding_name(enc) - rv << " encoding='#{encoding_name}'" - end + rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i rv << " standalone='#@standalone'" if @standalone rv end -- cgit v1.2.3