summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
authornagachika <nagachika@ruby-lang.org>2021-07-03 13:49:46 +0900
committernagachika <nagachika@ruby-lang.org>2021-07-03 13:49:46 +0900
commite62cccaeb0986d43480bccbd365cb20056bda4d7 (patch)
tree7b70b94213b5b5758e9e1292fc3022011d2c914a /transcode.c
parent2aad080396f5b79a33502f1d812fb237968cb931 (diff)
merge revision(s) e86c1f6fc53433ef5c82ed2b7a4cc9a12c153e4c,f6539202c52a051a4e6946a318a1d9cd29002990: [Backport #1205]
Work around issue transcoding issue with non-ASCII compatible encodings and xml escaping When using a non-ASCII compatible source and destination encoding and xml escaping (the :xml option to String#encode), the resulting string was broken, as it used the correct non-ASCII compatible encoding, but contained data that was ASCII-compatible instead of compatible with the string's encoding. Work around this issue by detecting the case where both the source and destination encoding are non-ASCII compatible, and transcoding the source string from the non-ASCII compatible encoding to UTF-8. The xml escaping code will correctly handle the UTF-8 source string and the return the correctly encoded and escaped value. Fixes [Bug #12052] Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org> --- test/ruby/test_transcode.rb | 19 +++++++++++++++++++ transcode.c | 6 ++++++ 2 files changed, 25 insertions(+) =?UTF-8?q?-=20add=20regression=20tests=20for=20U+6E7F=20(?= =?UTF-8?q?=E6=B9=BF)=20in=20ISO-2022-JP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In ISO-2022-JP, the bytes use to code are the same as those for "<>". This adds regression tests to make sure that these bytes, when representing 湿, are NOT escaped with encode("ISO-2022-JP, xml: :text) or similar. These are additional regression tests for #12052. --- test/ruby/test_transcode.rb | 3 +++ 1 file changed, 3 insertions(+)
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/transcode.c b/transcode.c
index a72afdc44b..d2abd9e0e5 100644
--- a/transcode.c
+++ b/transcode.c
@@ -2719,6 +2719,12 @@ str_transcode0(int argc, VALUE *argv, VALUE *self, int ecflags, VALUE ecopts)
}
}
else {
+ if (senc && denc && !rb_enc_asciicompat(senc) && !rb_enc_asciicompat(denc)) {
+ rb_encoding *utf8 = rb_utf8_encoding();
+ str = rb_str_conv_enc(str, senc, utf8);
+ senc = utf8;
+ sname = "UTF-8";
+ }
if (encoding_equal(sname, dname)) {
sname = "";
dname = "";