diff options
author | Kazuki Yamaguchi <k@rhe.jp> | 2022-09-26 16:39:53 +0900 |
---|---|---|
committer | Kazuki Yamaguchi <k@rhe.jp> | 2022-09-26 16:44:46 +0900 |
commit | 5b0396473bbcd70756a09d887fb7436d6cd72dce (patch) | |
tree | bf33032d7dad0c028d9b1781bc6018e2f7e99e04 /string.c | |
parent | 1c14e406d3c4a4c660f66f0d1c642d1ed2aabed2 (diff) |
Fix coderange calculation in String#b
Leave the new coderange unknown if the original encoding is not
ASCII-compatible. Non-ASCII-compatible encoding strings with valid or
broken coderange can end up as ascii-only.
Fixes 9a8f6e392fbd ("Cheaply derive code range for String#b return
value", 2022-07-25).
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 30 |
1 files changed, 16 insertions, 14 deletions
@@ -10771,20 +10771,22 @@ rb_str_b(VALUE str) } str_replace_shared_without_enc(str2, str); - // BINARY strings can never be broken; they're either 7-bit ASCII or VALID. - // If we know the receiver's code range then we know the result's code range. - int cr = ENC_CODERANGE(str); - switch (cr) { - case ENC_CODERANGE_7BIT: - ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); - break; - case ENC_CODERANGE_BROKEN: - case ENC_CODERANGE_VALID: - ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID); - break; - default: - ENC_CODERANGE_CLEAR(str2); - break; + if (rb_enc_asciicompat(STR_ENC_GET(str))) { + // BINARY strings can never be broken; they're either 7-bit ASCII or VALID. + // If we know the receiver's code range then we know the result's code range. + int cr = ENC_CODERANGE(str); + switch (cr) { + case ENC_CODERANGE_7BIT: + ENC_CODERANGE_SET(str2, ENC_CODERANGE_7BIT); + break; + case ENC_CODERANGE_BROKEN: + case ENC_CODERANGE_VALID: + ENC_CODERANGE_SET(str2, ENC_CODERANGE_VALID); + break; + default: + ENC_CODERANGE_CLEAR(str2); + break; + } } return str2; |