summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2023-09-26 13:25:44 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2023-09-26 15:35:40 +0900
commit6b66b5fdedb2c9a9ee48e290d57ca7f8d55e01a2 (patch)
treea534e2aacfc78f270444742f4837ea629b281d27
parentf0d827860783afd34e12450dd310e50917d396e3 (diff)
[Bug #19902] Update the coderange regarding the changed region
-rw-r--r--ext/-test-/string/set_len.c10
-rw-r--r--string.c27
-rw-r--r--test/-ext-/string/test_set_len.rb29
3 files changed, 66 insertions, 0 deletions
diff --git a/ext/-test-/string/set_len.c b/ext/-test-/string/set_len.c
index 219cea404c..049da2cdb5 100644
--- a/ext/-test-/string/set_len.c
+++ b/ext/-test-/string/set_len.c
@@ -7,8 +7,18 @@ bug_str_set_len(VALUE str, VALUE len)
return str;
}
+static VALUE
+bug_str_append(VALUE str, VALUE addendum)
+{
+ StringValue(addendum);
+ rb_str_modify_expand(str, RSTRING_LEN(addendum));
+ memcpy(RSTRING_END(str), RSTRING_PTR(addendum), RSTRING_LEN(addendum));
+ return str;
+}
+
void
Init_string_set_len(VALUE klass)
{
rb_define_method(klass, "set_len", bug_str_set_len, 1);
+ rb_define_method(klass, "append", bug_str_append, 1);
}
diff --git a/string.c b/string.c
index deeed4a12a..18d03dc72e 100644
--- a/string.c
+++ b/string.c
@@ -2985,6 +2985,33 @@ rb_str_set_len(VALUE str, long len)
if (len > (capa = (long)str_capacity(str, termlen)) || len < 0) {
rb_bug("probable buffer overflow: %ld for %ld", len, capa);
}
+
+ int cr = ENC_CODERANGE(str);
+ if (cr == ENC_CODERANGE_UNKNOWN) {
+ /* Leave unknown. */
+ }
+ else if (len > RSTRING_LEN(str)) {
+ if (ENC_CODERANGE_CLEAN_P(cr)) {
+ /* Update the coderange regarding the extended part. */
+ const char *const prev_end = RSTRING_END(str);
+ const char *const new_end = RSTRING_PTR(str) + len;
+ rb_encoding *enc = rb_enc_get(str);
+ rb_str_coderange_scan_restartable(prev_end, new_end, enc, &cr);
+ ENC_CODERANGE_SET(str, cr);
+ }
+ else if (cr == ENC_CODERANGE_BROKEN) {
+ /* May be valid now, by appended part. */
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
+ }
+ }
+ else if (len < RSTRING_LEN(str)) {
+ if (cr != ENC_CODERANGE_7BIT) {
+ /* ASCII-only string is keeping after truncated. Valid
+ * and broken may be invalid or valid, leave unknown. */
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
+ }
+ }
+
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], termlen);
}
diff --git a/test/-ext-/string/test_set_len.rb b/test/-ext-/string/test_set_len.rb
index 67ba961194..e3eff75d9b 100644
--- a/test/-ext-/string/test_set_len.rb
+++ b/test/-ext-/string/test_set_len.rb
@@ -34,4 +34,33 @@ class Test_StrSetLen < Test::Unit::TestCase
assert_equal 128, Bug::String.capacity(str)
assert_equal 127, str.set_len(127).bytesize, bug12757
end
+
+ def test_coderange_after_append
+ u = -"\u3042"
+ str = Bug::String.new(encoding: Encoding::UTF_8)
+ bsize = u.bytesize
+ str.append(u)
+ assert_equal 0, str.bytesize
+ str.set_len(bsize)
+ assert_equal bsize, str.bytesize
+ assert_predicate str, :valid_encoding?
+ assert_not_predicate str, :ascii_only?
+ assert_equal u, str
+ end
+
+ def test_coderange_after_trunc
+ u = -"\u3042"
+ bsize = u.bytesize
+ str = Bug::String.new(u)
+ str.set_len(bsize - 1)
+ assert_equal bsize - 1, str.bytesize
+ assert_not_predicate str, :valid_encoding?
+ assert_not_predicate str, :ascii_only?
+ str.append(u.byteslice(-1))
+ str.set_len(bsize)
+ assert_equal bsize, str.bytesize
+ assert_predicate str, :valid_encoding?
+ assert_not_predicate str, :ascii_only?
+ assert_equal u, str
+ end
end