summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2023-09-26 13:25:44 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2023-09-26 15:35:40 +0900
commit6b66b5fdedb2c9a9ee48e290d57ca7f8d55e01a2 (patch)
treea534e2aacfc78f270444742f4837ea629b281d27 /string.c
parentf0d827860783afd34e12450dd310e50917d396e3 (diff)
[Bug #19902] Update the coderange regarding the changed region
Diffstat (limited to 'string.c')
-rw-r--r--string.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/string.c b/string.c
index deeed4a12a..18d03dc72e 100644
--- a/string.c
+++ b/string.c
@@ -2985,6 +2985,33 @@ rb_str_set_len(VALUE str, long len)
if (len > (capa = (long)str_capacity(str, termlen)) || len < 0) {
rb_bug("probable buffer overflow: %ld for %ld", len, capa);
}
+
+ int cr = ENC_CODERANGE(str);
+ if (cr == ENC_CODERANGE_UNKNOWN) {
+ /* Leave unknown. */
+ }
+ else if (len > RSTRING_LEN(str)) {
+ if (ENC_CODERANGE_CLEAN_P(cr)) {
+ /* Update the coderange regarding the extended part. */
+ const char *const prev_end = RSTRING_END(str);
+ const char *const new_end = RSTRING_PTR(str) + len;
+ rb_encoding *enc = rb_enc_get(str);
+ rb_str_coderange_scan_restartable(prev_end, new_end, enc, &cr);
+ ENC_CODERANGE_SET(str, cr);
+ }
+ else if (cr == ENC_CODERANGE_BROKEN) {
+ /* May be valid now, by appended part. */
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
+ }
+ }
+ else if (len < RSTRING_LEN(str)) {
+ if (cr != ENC_CODERANGE_7BIT) {
+ /* ASCII-only string is keeping after truncated. Valid
+ * and broken may be invalid or valid, leave unknown. */
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_UNKNOWN);
+ }
+ }
+
STR_SET_LEN(str, len);
TERM_FILL(&RSTRING_PTR(str)[len], termlen);
}