summaryrefslogtreecommitdiff
path: root/test/ruby/test_transcode.rb
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2021-06-26 12:32:39 -0700
committerGitHub <noreply@github.com>2021-06-26 12:32:39 -0700
commite86c1f6fc53433ef5c82ed2b7a4cc9a12c153e4c (patch)
treec72cf93eaf31977d2cd80ecda12683c4017e3b55 /test/ruby/test_transcode.rb
parent391abc543cea118a9cd7d6310acadbfa352668ef (diff)
Work around issue transcoding issue with non-ASCII compatible encodings and xml escaping
When using a non-ASCII compatible source and destination encoding and xml escaping (the :xml option to String#encode), the resulting string was broken, as it used the correct non-ASCII compatible encoding, but contained data that was ASCII-compatible instead of compatible with the string's encoding. Work around this issue by detecting the case where both the source and destination encoding are non-ASCII compatible, and transcoding the source string from the non-ASCII compatible encoding to UTF-8. The xml escaping code will correctly handle the UTF-8 source string and the return the correctly encoded and escaped value. Fixes [Bug #12052] Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/4605 Merged-By: jeremyevans <code@jeremyevans.net>
Diffstat (limited to 'test/ruby/test_transcode.rb')
-rw-r--r--test/ruby/test_transcode.rb19
1 files changed, 19 insertions, 0 deletions
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 04c8248697..17384fd0c7 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -126,6 +126,25 @@ class TestTranscode < Test::Unit::TestCase
assert_equal("D\xFCrst".force_encoding('iso-8859-2'), "D\xFCrst".encode('iso-8859-2', 'iso-8859-1'))
end
+ def test_encode_xml_multibyte
+ encodings = %w'UTF-8 UTF-16LE UTF-16BE UTF-32LE UTF-32BE'
+ encodings.each do |src_enc|
+ encodings.each do |dst_enc|
+ escaped = "<>".encode(src_enc).encode(dst_enc, :xml=>:text)
+ assert_equal("&lt;&gt;", escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :text")
+
+ escaped = '<">'.encode(src_enc).encode(dst_enc, :xml=>:attr)
+ assert_equal('"&lt;&quot;&gt;"', escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :attr")
+
+ escaped = "<>".encode(src_enc).force_encoding("UTF-8").encode(dst_enc, src_enc, :xml=>:text)
+ assert_equal("&lt;&gt;", escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :text")
+
+ escaped = '<">'.encode(src_enc).force_encoding("UTF-8").encode(dst_enc, src_enc, :xml=>:attr)
+ assert_equal('"&lt;&quot;&gt;"', escaped.encode('UTF-8'), "failed encoding #{src_enc} to #{dst_enc} with xml: :attr")
+ end
+ end
+ end
+
def test_ascii_range
encodings = [
'US-ASCII', 'ASCII-8BIT',