summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVinicius Stock <vinicius.stock@shopify.com>2024-10-08 10:47:08 -0400
committergit <svn-admin@ruby-lang.org>2024-10-09 14:07:10 +0000
commite50754fcfaeb80bef93f043c13895ce386ddb18c (patch)
treede915ae686ac19718d18fe7910a5a378292b53f0
parent615a0872167e274d720d7d6bc3fe9a0f34bb44cf (diff)
[ruby/prism] Avoid breaking code units offset on binary encoding
https://github.com/ruby/prism/commit/25a4cf6794 Co-authored-by: Kevin Newton <kddnewton@users.noreply.github.com>
-rw-r--r--lib/prism/parse_result.rb2
-rw-r--r--test/prism/ruby/location_test.rb19
2 files changed, 20 insertions, 1 deletions
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index ae026b42ac..aea5dee9fa 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -90,7 +90,7 @@ module Prism
# concept of code units that differs from the number of characters in other
# encodings, it is not captured here.
def code_units_offset(byte_offset, encoding)
- byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
byteslice.bytesize / 2
diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb
index fc80a5b875..e360a0db72 100644
--- a/test/prism/ruby/location_test.rb
+++ b/test/prism/ruby/location_test.rb
@@ -140,6 +140,25 @@ module Prism
assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
end
+ def test_code_units_handles_binary_encoding_with_multibyte_characters
+ # If the encoding is set to binary and the source contains multibyte
+ # characters, we avoid breaking the code unit offsets, but they will
+ # still be incorrect.
+
+ program = Prism.parse(<<~RUBY).value
+ # -*- encoding: binary -*-
+
+ 😀 + 😀
+ RUBY
+
+ # first 😀
+ location = program.statements.body.first.receiver.location
+
+ assert_equal 4, location.end_code_units_column(Encoding::UTF_8)
+ assert_equal 4, location.end_code_units_column(Encoding::UTF_16LE)
+ assert_equal 4, location.end_code_units_column(Encoding::UTF_32LE)
+ end
+
def test_chop
location = Prism.parse("foo").value.location