summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--lib/rexml/parsers/baseparser.rb3
-rw-r--r--test/rexml/test_document.rb22
3 files changed, 32 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index e48e9b7be3..5e6e4687ac 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Sun Oct 28 21:40:13 2012 Kouhei Sutou <kou@cozmixng.org>
+
+ * lib/rexml/parsers/baseparser.rb: Fix a bug that UTF-8 is used
+ for UTF-16XX encoded XML that doesn't have encoding="UTF-16" in
+ XML declration.
+ * test/rexml/test_document.rb: Add tests for the above change.
+
Sun Oct 28 21:37:34 2012 Kouhei Sutou <kou@cozmixng.org>
* test/rexml/test_document.rb: Group tests that they parse
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index dc4a1c8bee..a88896c5db 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -215,6 +215,9 @@ module REXML
if need_source_encoding_update?(encoding)
@source.encoding = encoding
end
+ if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding
+ encoding = "UTF-16"
+ end
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone ]
diff --git a/test/rexml/test_document.rb b/test/rexml/test_document.rb
index 028fa988a6..4c5d7d1dd8 100644
--- a/test/rexml/test_document.rb
+++ b/test/rexml/test_document.rb
@@ -246,5 +246,27 @@ EOX
assert_equal("UTF-16", document.encoding)
end
end
+
+ class NoEncodingTest < self
+ def test_utf_16le
+ xml = <<-EOX.encode("UTF-16LE").force_encoding("ASCII-8BIT")
+<?xml version="1.0"?>
+<message>Hello world!</message>
+EOX
+ bom = "\ufeff".encode("UTF-16LE").force_encoding("ASCII-8BIT")
+ document = REXML::Document.new(bom + xml)
+ assert_equal("UTF-16", document.encoding)
+ end
+
+ def test_utf_16be
+ xml = <<-EOX.encode("UTF-16BE").force_encoding("ASCII-8BIT")
+<?xml version="1.0"?>
+<message>Hello world!</message>
+EOX
+ bom = "\ufeff".encode("UTF-16BE").force_encoding("ASCII-8BIT")
+ document = REXML::Document.new(bom + xml)
+ assert_equal("UTF-16", document.encoding)
+ end
+ end
end
end