summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2011-03-02 15:36:48 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2011-03-02 15:36:48 +0000
commitf25ff846f6884e202d13ab28e3e10c917b9cdf31 (patch)
tree1912c745ed74d061e3213706184d63af6d7963a1 /lib
parentcddcffb8f9dd015650b2ac02235bfe39261989f9 (diff)
* lib/rexml/encoding.rb (REXML::Encoding#encoding=): store @encoding
a String which means the name of the encoding. this partially revert r29646. * lib/rexml/document.rb: follow above. * lib/rexml/output.rb: ditto. * lib/rexml/parsers/baseparser.rb: ditto. * lib/rexml/source.rb: ditto. * lib/rexml/xmldecl.rb: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31008 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib')
-rw-r--r--lib/rexml/document.rb2
-rw-r--r--lib/rexml/encoding.rb42
-rw-r--r--lib/rexml/formatters/default.rb2
-rw-r--r--lib/rexml/output.rb2
-rw-r--r--lib/rexml/parsers/baseparser.rb2
-rw-r--r--lib/rexml/source.rb6
-rw-r--r--lib/rexml/xmldecl.rb13
7 files changed, 26 insertions, 43 deletions
diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb
index 68a744d9e5..790a1c78db 100644
--- a/lib/rexml/document.rb
+++ b/lib/rexml/document.rb
@@ -184,7 +184,7 @@ module REXML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
- if xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
+ if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = if indent > -1
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb
index 3e7bdfb6aa..d1d5172841 100644
--- a/lib/rexml/encoding.rb
+++ b/lib/rexml/encoding.rb
@@ -1,8 +1,9 @@
module REXML
module Encoding
- # ID ---> Encoding object
+ # ID ---> Encoding name
attr_reader :encoding
def encoding=(encoding)
+ encoding = encoding.name if encoding.is_a?(Encoding)
if encoding.is_a?(String)
original_encoding = encoding
encoding = find_encoding(encoding)
@@ -11,35 +12,25 @@ module REXML
end
end
return false if defined?(@encoding) and encoding == @encoding
- if encoding and encoding != ::Encoding::UTF_8
- @encoding = encoding
+ if encoding
+ @encoding = encoding.upcase
else
- @encoding = ::Encoding::UTF_8
+ @encoding = 'UTF-8'
end
true
end
def check_encoding(xml)
- # We have to recognize UTF-16, LSB UTF-16, and UTF-8
+ # We have to recognize UTF-16BE, UTF-16LE, and UTF-8
if xml[0, 2] == "\xfe\xff"
xml[0, 2] = ""
- ::Encoding::UTF_16BE
+ return 'UTF-16BE'
elsif xml[0, 2] == "\xff\xfe"
xml[0, 2] = ""
- ::Encoding::UTF_16LE
- else
- if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
- \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
- encoding_name = $3
- if /\Autf-16\z/i =~ encoding_name
- ::Encoding::UTF_16BE
- else
- find_encoding(encoding_name)
- end
- else
- ::Encoding::UTF_8
- end
+ return 'UTF-16LE'
end
+ xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
+ return $3 ? $3.upcase : 'UTF-8'
end
def encode(string)
@@ -53,14 +44,19 @@ module REXML
private
def find_encoding(name)
case name
- when "UTF-16"
- name = "UTF-16BE"
when /\Ashift-jis\z/i
- name = "Shift_JIS"
+ return "SHIFT_JIS"
when /\ACP-(\d+)\z/
name = "CP#{$1}"
+ when /\AUTF-8\z/i
+ return name
+ end
+ begin
+ ::Encoding::Converter.search_convpath(name, 'UTF-8')
+ rescue ::Encoding::ConverterNotFoundError
+ return nil
end
- ::Encoding.find(name)
+ name
end
end
end
diff --git a/lib/rexml/formatters/default.rb b/lib/rexml/formatters/default.rb
index ec4149047d..574c821f96 100644
--- a/lib/rexml/formatters/default.rb
+++ b/lib/rexml/formatters/default.rb
@@ -22,7 +22,7 @@ module REXML
case node
when Document
- if node.xml_decl.encoding != ::Encoding::UTF_8 && !output.kind_of?(Output)
+ if node.xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb
index 752f6e1d40..50333ba177 100644
--- a/lib/rexml/output.rb
+++ b/lib/rexml/output.rb
@@ -10,7 +10,7 @@ module REXML
@output = real_IO
self.encoding = encd
- @to_utf = (@encoding != ::Encoding::UTF_8)
+ @to_utf = encd != 'UTF-8'
end
def <<( content )
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index ee8b160ce5..0f1480b07d 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -248,7 +248,7 @@ module REXML
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true)
- if @source.encoding == ::Encoding::UTF_8
+ if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8)
end
end
diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb
index 227b0c56c4..112393cfd4 100644
--- a/lib/rexml/source.rb
+++ b/lib/rexml/source.rb
@@ -54,14 +54,12 @@ module REXML
def encoding=(enc)
return unless super
@line_break = encode( '>' )
- if @encoding != ::Encoding::UTF_8
+ if @encoding != 'UTF-8'
@buffer = decode(@buffer)
@to_utf = true
else
@to_utf = false
- if @buffer.respond_to? :force_encoding
- @buffer.force_encoding ::Encoding::UTF_8
- end
+ @buffer.force_encoding ::Encoding::UTF_8
end
end
diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb
index 81d3057732..361e4b7106 100644
--- a/lib/rexml/xmldecl.rb
+++ b/lib/rexml/xmldecl.rb
@@ -109,20 +109,9 @@ module REXML
end
private
- def normalized_encoding_name(_encoding)
- if _encoding == ::Encoding::UTF_16BE
- "UTF-16"
- else
- return _encoding.name
- end
- end
-
def content(enc)
rv = "version='#@version'"
- if @writeencoding || enc.to_s !~ /\Autf-8\z/i
- encoding_name = normalized_encoding_name(enc)
- rv << " encoding='#{encoding_name}'"
- end
+ rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
rv << " standalone='#@standalone'" if @standalone
rv
end