diff options
| author | John Hawthorn <john@hawthorn.email> | 2024-11-09 01:00:13 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-09 01:00:13 -0800 |
| commit | a51a6bf6926241704593b9439e91c06ee6f3ee61 (patch) | |
| tree | 5edab0ae8c7dec7b271a4f9c0465689f31d41d86 | |
| parent | 75015d4c1f6965b5e85e96fb309f1f2129f933c0 (diff) | |
[Bug #20883] Fix coderange for sprintf on binary strings (#12040)
Fix update_coderange for binary strings
Although a binary (aka ASCII-8BIT) string will never have a broken
coderange, it still has to differentiate between "valid" and "7bit".
On Ruby 3.4/trunk this problem is masked because we now clear the
coderange more agressively in rb_str_resize, and we happened to always
be strinking this string, but we should not assume that.
On Ruby 3.3 this created strings where `ascii_only?` was true in cases
it shouldn't be as well as other problems.
Fixes [Bug #20883]
Co-authored-by: Daniel Colson <danieljamescolson@gmail.com>
Co-authored-by: Matthew Draper <matthew@trebex.net>
| -rw-r--r-- | sprintf.c | 3 | ||||
| -rw-r--r-- | test/ruby/test_sprintf.rb | 8 |
2 files changed, 9 insertions, 2 deletions
@@ -247,8 +247,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) } #define update_coderange(partial) do { \ - if (coderange != ENC_CODERANGE_BROKEN && scanned < blen \ - && rb_enc_to_index(enc) /* != ENCINDEX_ASCII_8BIT */) { \ + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { \ int cr = coderange; \ scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); \ ENC_CODERANGE_SET(result, \ diff --git a/test/ruby/test_sprintf.rb b/test/ruby/test_sprintf.rb index c453ecd350..9b972dcbaa 100644 --- a/test/ruby/test_sprintf.rb +++ b/test/ruby/test_sprintf.rb @@ -543,4 +543,12 @@ class TestSprintf < Test::Unit::TestCase sprintf("%*s", RbConfig::LIMITS["INT_MIN"], "") end end + + def test_binary_format_coderange + 1.upto(500) do |i| + str = sprintf("%*s".b, i, "\xe2".b) + refute_predicate str, :ascii_only? + assert_equal i, str.bytesize + end + end end |
