diff options
| author | Vinicius Stock <vinicius.stock@shopify.com> | 2024-10-08 10:47:08 -0400 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2024-10-09 14:07:10 +0000 |
| commit | e50754fcfaeb80bef93f043c13895ce386ddb18c (patch) | |
| tree | de915ae686ac19718d18fe7910a5a378292b53f0 | |
| parent | 615a0872167e274d720d7d6bc3fe9a0f34bb44cf (diff) | |
[ruby/prism] Avoid breaking code units offset on binary encoding
https://github.com/ruby/prism/commit/25a4cf6794
Co-authored-by: Kevin Newton <kddnewton@users.noreply.github.com>
| -rw-r--r-- | lib/prism/parse_result.rb | 2 | ||||
| -rw-r--r-- | test/prism/ruby/location_test.rb | 19 |
2 files changed, 20 insertions, 1 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index ae026b42ac..aea5dee9fa 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -90,7 +90,7 @@ module Prism # concept of code units that differs from the number of characters in other # encodings, it is not captured here. def code_units_offset(byte_offset, encoding) - byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding) + byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace) if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE byteslice.bytesize / 2 diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb index fc80a5b875..e360a0db72 100644 --- a/test/prism/ruby/location_test.rb +++ b/test/prism/ruby/location_test.rb @@ -140,6 +140,25 @@ module Prism assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE) end + def test_code_units_handles_binary_encoding_with_multibyte_characters + # If the encoding is set to binary and the source contains multibyte + # characters, we avoid breaking the code unit offsets, but they will + # still be incorrect. + + program = Prism.parse(<<~RUBY).value + # -*- encoding: binary -*- + + 😀 + 😀 + RUBY + + # first 😀 + location = program.statements.body.first.receiver.location + + assert_equal 4, location.end_code_units_column(Encoding::UTF_8) + assert_equal 4, location.end_code_units_column(Encoding::UTF_16LE) + assert_equal 4, location.end_code_units_column(Encoding::UTF_32LE) + end + def test_chop location = Prism.parse("foo").value.location |
