diff options
author | nagachika <nagachika@ruby-lang.org> | 2022-11-05 13:56:58 +0900 |
---|---|---|
committer | nagachika <nagachika@ruby-lang.org> | 2022-11-05 14:24:03 +0900 |
commit | 8572f02d63de2b99389df0791d6eacb77482afef (patch) | |
tree | 18f4c9d21e9fda36fc4dfce7165c9aa51f2e8854 | |
parent | 597ce7966c38e4f7fc7368a860ac2d056de794ba (diff) |
merge revision(s) 35c794b26d406c39f90e188e3884003fe6aca532,725626d8905fe1ac4a2cf1c3e2db6412bf8f381f,b32a3f1275a8c7748f2134492ce3c532f277d261: [Backport #18964]
Fixed by [Bug #18964]
---
file.c | 3 ---
1 file changed, 3 deletions(-)
[Bug #18964] Update the code range of appended portion
---
ext/-test-/econv/append.c | 15 +++++++++++++++
ext/-test-/econv/extconf.rb | 3 +++
ext/-test-/econv/init.c | 11 +++++++++++
transcode.c | 34 ++++++++++++++++++++++++++++++----
4 files changed, 59 insertions(+), 4 deletions(-)
create mode 100644 ext/-test-/econv/append.c
create mode 100644 ext/-test-/econv/extconf.rb
create mode 100644 ext/-test-/econv/init.c
[Bug #18964] Add test for `rb_econv_append`
---
test/-ext-/econv/test_append.rb | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 test/-ext-/econv/test_append.rb
-rw-r--r-- | ext/-test-/econv/append.c | 15 | ||||
-rw-r--r-- | ext/-test-/econv/extconf.rb | 3 | ||||
-rw-r--r-- | ext/-test-/econv/init.c | 11 | ||||
-rw-r--r-- | test/-ext-/econv/test_append.rb | 23 | ||||
-rw-r--r-- | transcode.c | 34 | ||||
-rw-r--r-- | version.h | 2 |
6 files changed, 83 insertions, 5 deletions
diff --git a/ext/-test-/econv/append.c b/ext/-test-/econv/append.c new file mode 100644 index 0000000000..724cd136c0 --- /dev/null +++ b/ext/-test-/econv/append.c @@ -0,0 +1,15 @@ +#include "ruby/ruby.h" +#include "ruby/encoding.h" + +static VALUE +econv_append(VALUE self, VALUE src, VALUE dst) +{ + rb_econv_t *ec = DATA_PTR(self); + return rb_econv_str_append(ec, src, dst, 0); +} + +void +Init_econv_append(VALUE klass) +{ + rb_define_method(klass, "append", econv_append, 2); +} diff --git a/ext/-test-/econv/extconf.rb b/ext/-test-/econv/extconf.rb new file mode 100644 index 0000000000..d786b15db9 --- /dev/null +++ b/ext/-test-/econv/extconf.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: false +require_relative "../auto_ext.rb" +auto_ext(inc: true) diff --git a/ext/-test-/econv/init.c b/ext/-test-/econv/init.c new file mode 100644 index 0000000000..9772ebe71c --- /dev/null +++ b/ext/-test-/econv/init.c @@ -0,0 +1,11 @@ +#include "ruby.h" + +#define init(n) {void Init_econv_##n(VALUE klass); Init_econv_##n(klass);} + +void +Init_econv(void) +{ + VALUE mBug = rb_define_module("Bug"); + VALUE klass = rb_define_class_under(mBug, "EConv", rb_path2class("Encoding::Converter")); + TEST_INIT_FUNCS(init); +} diff --git a/test/-ext-/econv/test_append.rb b/test/-ext-/econv/test_append.rb new file mode 100644 index 0000000000..f8c1d2add6 --- /dev/null +++ b/test/-ext-/econv/test_append.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: false +require 'test/unit' +require "-test-/econv" + +class Test_EConvAppend < Test::Unit::TestCase + def test_econv_str_append_valid + ec = Bug::EConv.new("utf-8", "cp932") + dst = "\u3044".encode("cp932") + ret = ec.append("\u3042"*30, dst) + assert_same(dst, ret) + assert_not_predicate(dst, :ascii_only?) + assert_predicate(dst, :valid_encoding?) + end + + def test_econv_str_append_broken + ec = Bug::EConv.new("utf-8", "cp932") + dst = "" + ret = ec.append("\u3042"*30, dst) + assert_same(dst, ret) + assert_not_predicate(dst, :ascii_only?) + assert_not_predicate(dst, :valid_encoding?) + end +end diff --git a/transcode.c b/transcode.c index d7011443f8..e494747823 100644 --- a/transcode.c +++ b/transcode.c @@ -1812,6 +1812,12 @@ rb_econv_asciicompat_encoding(const char *ascii_incompat_name) return data.ascii_compat_name; } +/* + * Append `len` bytes pointed by `ss` to `dst` with converting with `ec`. + * + * If the result of the conversion is not compatible with the encoding of + * `dst`, `dst` may not be valid encoding. + */ VALUE rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) { @@ -1819,11 +1825,19 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) unsigned char *ds, *dp, *de; rb_econv_result_t res; int max_output; + enum ruby_coderange_type coderange; + rb_encoding *dst_enc = ec->destination_encoding; if (NIL_P(dst)) { dst = rb_str_buf_new(len); - if (ec->destination_encoding) - rb_enc_associate(dst, ec->destination_encoding); + if (dst_enc) { + rb_enc_associate(dst, dst_enc); + } + coderange = ENC_CODERANGE_7BIT; // scan from the start + } + else { + dst_enc = rb_enc_get(dst); + coderange = rb_enc_str_coderange(dst); } if (ec->last_tc) @@ -1832,13 +1846,13 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) max_output = 1; do { + int cr; long dlen = RSTRING_LEN(dst); if (rb_str_capacity(dst) - dlen < (size_t)len + max_output) { unsigned long new_capa = (unsigned long)dlen + len + max_output; if (LONG_MAX < new_capa) rb_raise(rb_eArgError, "too long string"); - rb_str_resize(dst, new_capa); - rb_str_set_len(dst, dlen); + rb_str_modify_expand(dst, new_capa - dlen); } sp = (const unsigned char *)ss; se = sp + len; @@ -1846,6 +1860,18 @@ rb_econv_append(rb_econv_t *ec, const char *ss, long len, VALUE dst, int flags) de = ds + rb_str_capacity(dst); dp = ds += dlen; res = rb_econv_convert(ec, &sp, se, &dp, de, flags); + switch (coderange) { + case ENC_CODERANGE_7BIT: + case ENC_CODERANGE_VALID: + cr = (int)coderange; + rb_str_coderange_scan_restartable((char *)ds, (char *)dp, dst_enc, &cr); + coderange = cr; + ENC_CODERANGE_SET(dst, coderange); + break; + case ENC_CODERANGE_UNKNOWN: + case ENC_CODERANGE_BROKEN: + break; + } len -= (const char *)sp - ss; ss = (const char *)sp; rb_str_set_len(dst, dlen + (dp - ds)); @@ -11,7 +11,7 @@ # define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR #define RUBY_VERSION_TEENY 3 #define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR -#define RUBY_PATCHLEVEL 173 +#define RUBY_PATCHLEVEL 174 #define RUBY_RELEASE_YEAR 2022 #define RUBY_RELEASE_MONTH 11 |