diff options
author | Jean Boussier <byroot@ruby-lang.org> | 2023-11-08 14:05:52 +0100 |
---|---|---|
committer | Jean Boussier <jean.boussier@gmail.com> | 2023-11-09 12:38:10 +0100 |
commit | ea1b1ea1aa98bc9488564ef18aa4032aa1cb5536 (patch) | |
tree | 8e0428d9ddf0cf765bc06538adda3b7a98ccb0be /string.c | |
parent | 0a7e620a36a74c4fc604f9068fb839658678f96c (diff) |
String#force_encoding don't clear coderange if encoding is unchanged
Some code out there blind calls `force_encoding` without checking
what the original encoding was, which clears the coderange uselessly.
If the String is big, it can be a rather costly mistake.
For instance the `rack-utf8_sanitizer` gem does this on request
bodies.
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 18 |
1 files changed, 17 insertions, 1 deletions
@@ -10843,7 +10843,23 @@ static VALUE rb_str_force_encoding(VALUE str, VALUE enc) { str_modifiable(str); - rb_enc_associate(str, rb_to_encoding(enc)); + + rb_encoding *encoding = rb_to_encoding(enc); + int idx = rb_enc_to_index(encoding); + + // If the encoding is unchanged, we do nothing. + if (ENCODING_GET(str) == idx) { + return str; + } + + rb_enc_associate_index(str, idx); + + // If the coderange was 7bit and the new encoding is ASCII-compatible + // we can keep the coderange. + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) { + return str; + } + ENC_CODERANGE_CLEAR(str); return str; } |