summaryrefslogtreecommitdiff
path: root/string.c
diff options
context:
space:
mode:
authorJean Boussier <byroot@ruby-lang.org>2023-11-08 14:05:52 +0100
committerJean Boussier <jean.boussier@gmail.com>2023-11-09 12:38:10 +0100
commitea1b1ea1aa98bc9488564ef18aa4032aa1cb5536 (patch)
tree8e0428d9ddf0cf765bc06538adda3b7a98ccb0be /string.c
parent0a7e620a36a74c4fc604f9068fb839658678f96c (diff)
String#force_encoding don't clear coderange if encoding is unchanged
Some code out there blind calls `force_encoding` without checking what the original encoding was, which clears the coderange uselessly. If the String is big, it can be a rather costly mistake. For instance the `rack-utf8_sanitizer` gem does this on request bodies.
Diffstat (limited to 'string.c')
-rw-r--r--string.c18
1 files changed, 17 insertions, 1 deletions
diff --git a/string.c b/string.c
index b3004624dd..41641c67e8 100644
--- a/string.c
+++ b/string.c
@@ -10843,7 +10843,23 @@ static VALUE
rb_str_force_encoding(VALUE str, VALUE enc)
{
str_modifiable(str);
- rb_enc_associate(str, rb_to_encoding(enc));
+
+ rb_encoding *encoding = rb_to_encoding(enc);
+ int idx = rb_enc_to_index(encoding);
+
+ // If the encoding is unchanged, we do nothing.
+ if (ENCODING_GET(str) == idx) {
+ return str;
+ }
+
+ rb_enc_associate_index(str, idx);
+
+ // If the coderange was 7bit and the new encoding is ASCII-compatible
+ // we can keep the coderange.
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
+ return str;
+ }
+
ENC_CODERANGE_CLEAR(str);
return str;
}