diff options
Diffstat (limited to 'lib/rexml/encoding.rb')
-rw-r--r-- | lib/rexml/encoding.rb | 42 |
1 files changed, 19 insertions, 23 deletions
diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 3e7bdfb6aa..d1d5172841 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -1,8 +1,9 @@ module REXML module Encoding - # ID ---> Encoding object + # ID ---> Encoding name attr_reader :encoding def encoding=(encoding) + encoding = encoding.name if encoding.is_a?(Encoding) if encoding.is_a?(String) original_encoding = encoding encoding = find_encoding(encoding) @@ -11,35 +12,25 @@ module REXML end end return false if defined?(@encoding) and encoding == @encoding - if encoding and encoding != ::Encoding::UTF_8 - @encoding = encoding + if encoding + @encoding = encoding.upcase else - @encoding = ::Encoding::UTF_8 + @encoding = 'UTF-8' end true end def check_encoding(xml) - # We have to recognize UTF-16, LSB UTF-16, and UTF-8 + # We have to recognize UTF-16BE, UTF-16LE, and UTF-8 if xml[0, 2] == "\xfe\xff" xml[0, 2] = "" - ::Encoding::UTF_16BE + return 'UTF-16BE' elsif xml[0, 2] == "\xff\xfe" xml[0, 2] = "" - ::Encoding::UTF_16LE - else - if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1 - \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml - encoding_name = $3 - if /\Autf-16\z/i =~ encoding_name - ::Encoding::UTF_16BE - else - find_encoding(encoding_name) - end - else - ::Encoding::UTF_8 - end + return 'UTF-16LE' end + xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m + return $3 ? $3.upcase : 'UTF-8' end def encode(string) @@ -53,14 +44,19 @@ module REXML private def find_encoding(name) case name - when "UTF-16" - name = "UTF-16BE" when /\Ashift-jis\z/i - name = "Shift_JIS" + return "SHIFT_JIS" when /\ACP-(\d+)\z/ name = "CP#{$1}" + when /\AUTF-8\z/i + return name + end + begin + ::Encoding::Converter.search_convpath(name, 'UTF-8') + rescue ::Encoding::ConverterNotFoundError + return nil end - ::Encoding.find(name) + name end end end |