summaryrefslogtreecommitdiff
path: root/transcode.c
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2022-08-18 11:37:54 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2022-08-18 14:57:08 +0900
commit725626d8905fe1ac4a2cf1c3e2db6412bf8f381f (patch)
tree7cae7468c03688405b2a6fa6cfe5af21fb599f7e /transcode.c
parent2a55c61ee77df55e8715809958ea0439f3918cf2 (diff)
[Bug #18964] Update the code range of appended portion
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/6250
Diffstat (limited to 'transcode.c')
-rw-r--r--transcode.c34
1 files changed, 30 insertions, 4 deletions
diff --git a/transcode.c b/transcode.c
index 939e9567f9..5fafad398f 100644
--- a/transcode.c
+++ b/transcode.c
@@ -1812,6 +1812,12 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name)
return data.ascii_compat_name;
}
+/*
+ * Append `len` bytes pointed by `ss` to `dst` with converting with `ec`.
+ *
+ * If the result of the conversion is not compatible with the encoding of
+ * `dst`, `dst` may not be valid encoding.
+ */
VALUE
rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
{
@@ -1819,11 +1825,19 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
unsigned char *ds, *dp, *de;
rb_econv_result_t res;
int max_output;
+ enum ruby_coderange_type coderange;
+ rb_encoding *dst_enc = ec->destination_encoding;
if (NIL_P(dst)) {
dst = rb_str_buf_new(len);
- if (ec->destination_encoding)
- rb_enc_associate(dst, ec->destination_encoding);
+ if (dst_enc) {
+ rb_enc_associate(dst, dst_enc);
+ }
+ coderange = ENC_CODERANGE_7BIT; // scan from the start
+ }
+ else {
+ dst_enc = rb_enc_get(dst);
+ coderange = rb_enc_str_coderange(dst);
}
if (ec->last_tc)
@@ -1832,13 +1846,13 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
max_output = 1;
do {
+ int cr;
long dlen = RSTRING_LEN(dst);
if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) {
unsigned long new_capa = (unsigned long)dlen + len + max_output;
if (LONG_MAX < new_capa)
rb_raise(rb_eArgError, "too long string");
- rb_str_resize(dst, new_capa);
- rb_str_set_len(dst, dlen);
+ rb_str_modify_expand(dst, new_capa - dlen);
}
sp = (const unsigned char *)ss;
se = sp + len;
@@ -1846,6 +1860,18 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags)
de = ds + rb_str_capacity(dst);
dp = ds += dlen;
res = rb_econv_convert(ec, &sp, se, &dp, de, flags);
+ switch (coderange) {
+ case ENC_CODERANGE_7BIT:
+ case ENC_CODERANGE_VALID:
+ cr = (int)coderange;
+ rb_str_coderange_scan_restartable((char *)ds, (char *)dp, dst_enc, &cr);
+ coderange = cr;
+ ENC_CODERANGE_SET(dst, coderange);
+ break;
+ case ENC_CODERANGE_UNKNOWN:
+ case ENC_CODERANGE_BROKEN:
+ break;
+ }
len -= (const char *)sp - ss;
ss = (const char *)sp;
rb_str_set_len(dst, dlen + (dp - ds));