[ruby/prism] Avoid breaking code units offset on binary encoding

https://github.com/ruby/prism/commit/25a4cf6794 Co-authored-by: Kevin Newton <kddnewton@users.noreply.github.com>
author: Vinicius Stock <vinicius.stock@shopify.com> 2024-10-08 10:47:08 -0400
committer: git <svn-admin@ruby-lang.org> 2024-10-09 14:07:10 +0000
commit: e50754fcfaeb80bef93f043c13895ce386ddb18c (patch)
tree: de915ae686ac19718d18fe7910a5a378292b53f0 /test/prism/ruby
parent: 615a0872167e274d720d7d6bc3fe9a0f34bb44cf (diff)
1 files changed, 19 insertions, 0 deletions
diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb
index fc80a5b875..e360a0db72 100644
--- a/test/prism/ruby/location_test.rb
+++ b/test/prism/ruby/location_test.rb
@@ -140,6 +140,25 @@ module Prism
       assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
     end
 
+    def test_code_units_handles_binary_encoding_with_multibyte_characters
+      # If the encoding is set to binary and the source contains multibyte
+      # characters, we avoid breaking the code unit offsets, but they will
+      # still be incorrect.
+
+      program = Prism.parse(<<~RUBY).value
+        # -*- encoding: binary -*-
+
+        😀 + 😀
+      RUBY
+
+      # first 😀
+      location = program.statements.body.first.receiver.location
+
+      assert_equal 4, location.end_code_units_column(Encoding::UTF_8)
+      assert_equal 4, location.end_code_units_column(Encoding::UTF_16LE)
+      assert_equal 4, location.end_code_units_column(Encoding::UTF_32LE)
+    end
+
     def test_chop
       location = Prism.parse("foo").value.location
author	Vinicius Stock <vinicius.stock@shopify.com>	2024-10-08 10:47:08 -0400
committer	git <svn-admin@ruby-lang.org>	2024-10-09 14:07:10 +0000
commit	e50754fcfaeb80bef93f043c13895ce386ddb18c (patch)
tree	de915ae686ac19718d18fe7910a5a378292b53f0 /test/prism/ruby
parent	615a0872167e274d720d7d6bc3fe9a0f34bb44cf (diff)