[ruby/prism] Fix a token incompatibility for `Prism::Translation::Parser::Lexer`

This PR fixes a token incompatibility between Parser gem and `Prism::Translation::Parser` for the heredocs_leading_whitespace.txt test. https://github.com/ruby/prism/commit/7d45fb1eed
author: Koichi ITO <koic.ito@gmail.com> 2024-03-16 00:30:01 +0900
committer: git <svn-admin@ruby-lang.org> 2024-03-15 18:07:59 +0000
commit: c9da8d67fdb9fab82f76d583239f5b9761f60350 (patch)
tree: cdf016ad2dd3366939325f1e7c55b78e1715e4b8
parent: aceee71c35e0b387691836e756b4e008efd84cf1 (diff)
4 files changed, 36 insertions, 21 deletions
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 7febca449e..9e5d27ef29 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -217,6 +217,8 @@ module Prism
           index = 0
           length = lexed.length
 
+          heredoc_identifier_stack = []
+
           while index < length
             token, state = lexed[index]
             index += 1
@@ -275,6 +277,9 @@ module Prism
             when :tSPACE
               value = nil
             when :tSTRING_BEG
+              if token.type == :HEREDOC_START
+                heredoc_identifier_stack.push(value.match(/<<[-~]?["']?(?<heredoc_identifier>.*?)["']?\z/)[:heredoc_identifier])
+              end
               if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
                 next_location = token.location.join(next_token.location)
                 type = :tSTRING
@@ -322,7 +327,7 @@ module Prism
             when :tSTRING_END
               if token.type == :HEREDOC_END && value.end_with?("\n")
                 newline_length = value.end_with?("\r\n") ? 2 : 1
-                value = value.sub(/\r?\n\z/, '')
+                value = heredoc_identifier_stack.pop
                 location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
               elsif token.type == :REGEXP_END
                 value = value[0]
diff --git a/test/prism/fixtures/heredocs_leading_whitespace.txt b/test/prism/fixtures/heredocs_leading_whitespace.txt
index e786f08774..8f19836943 100644
--- a/test/prism/fixtures/heredocs_leading_whitespace.txt
+++ b/test/prism/fixtures/heredocs_leading_whitespace.txt
@@ -3,6 +3,11 @@ a
 b
      FOO
 
+<<-"  FOO"
+a
+b
+     FOO
+
 <<-'  FOO'
 a
 b
diff --git a/test/prism/parser_test.rb b/test/prism/parser_test.rb
index 4a4a414c29..237a4397ca 100644
--- a/test/prism/parser_test.rb
+++ b/test/prism/parser_test.rb
@@ -73,7 +73,6 @@ module Prism
     skip_tokens = [
       "comments.txt",
       "heredoc_with_comment.txt",
-      "heredocs_leading_whitespace.txt",
       "indented_file_end.txt",
       "strings.txt",
       "xstring_with_backslash.txt"
diff --git a/test/prism/snapshots/heredocs_leading_whitespace.txt b/test/prism/snapshots/heredocs_leading_whitespace.txt
index 06116821ca..5412f7d290 100644
--- a/test/prism/snapshots/heredocs_leading_whitespace.txt
+++ b/test/prism/snapshots/heredocs_leading_whitespace.txt
@@ -1,8 +1,8 @@
-@ ProgramNode (location: (1,0)-(16,10))
+@ ProgramNode (location: (1,0)-(21,10))
 ├── locals: []
 └── statements:
-    @ StatementsNode (location: (1,0)-(16,10))
-    └── body: (length: 4)
+    @ StatementsNode (location: (1,0)-(21,10))
+    └── body: (length: 5)
         ├── @ StringNode (location: (1,0)-(1,10))
         │   ├── flags: ∅
         │   ├── opening_loc: (1,0)-(1,10) = "<<-'  FOO'"
@@ -11,39 +11,45 @@
         │   └── unescaped: "a\nb\n"
         ├── @ StringNode (location: (6,0)-(6,10))
         │   ├── flags: ∅
-        │   ├── opening_loc: (6,0)-(6,10) = "<<-'  FOO'"
+        │   ├── opening_loc: (6,0)-(6,10) = "<<-\"  FOO\""
         │   ├── content_loc: (7,0)-(9,0) = "a\nb\n"
-        │   ├── closing_loc: (9,0)-(10,0) = "  FOO\n"
+        │   ├── closing_loc: (9,0)-(10,0) = "     FOO\n"
         │   └── unescaped: "a\nb\n"
-        ├── @ InterpolatedStringNode (location: (11,0)-(11,10))
-        │   ├── opening_loc: (11,0)-(11,10) = "<<~'  FOO'"
+        ├── @ StringNode (location: (11,0)-(11,10))
+        │   ├── flags: ∅
+        │   ├── opening_loc: (11,0)-(11,10) = "<<-'  FOO'"
+        │   ├── content_loc: (12,0)-(14,0) = "a\nb\n"
+        │   ├── closing_loc: (14,0)-(15,0) = "  FOO\n"
+        │   └── unescaped: "a\nb\n"
+        ├── @ InterpolatedStringNode (location: (16,0)-(16,10))
+        │   ├── opening_loc: (16,0)-(16,10) = "<<~'  FOO'"
         │   ├── parts: (length: 2)
-        │   │   ├── @ StringNode (location: (12,0)-(13,0))
+        │   │   ├── @ StringNode (location: (17,0)-(18,0))
         │   │   │   ├── flags: ∅
         │   │   │   ├── opening_loc: ∅
-        │   │   │   ├── content_loc: (12,0)-(13,0) = "a\n"
+        │   │   │   ├── content_loc: (17,0)-(18,0) = "a\n"
         │   │   │   ├── closing_loc: ∅
         │   │   │   └── unescaped: "a\n"
-        │   │   └── @ StringNode (location: (13,0)-(14,0))
+        │   │   └── @ StringNode (location: (18,0)-(19,0))
         │   │       ├── flags: ∅
         │   │       ├── opening_loc: ∅
-        │   │       ├── content_loc: (13,0)-(14,0) = "b\n"
+        │   │       ├── content_loc: (18,0)-(19,0) = "b\n"
         │   │       ├── closing_loc: ∅
         │   │       └── unescaped: "b\n"
-        │   └── closing_loc: (14,0)-(15,0) = "     FOO\n"
-        └── @ InterpolatedStringNode (location: (16,0)-(16,10))
-            ├── opening_loc: (16,0)-(16,10) = "<<~'  FOO'"
+        │   └── closing_loc: (19,0)-(20,0) = "     FOO\n"
+        └── @ InterpolatedStringNode (location: (21,0)-(21,10))
+            ├── opening_loc: (21,0)-(21,10) = "<<~'  FOO'"
             ├── parts: (length: 2)
-            │   ├── @ StringNode (location: (17,0)-(18,0))
+            │   ├── @ StringNode (location: (22,0)-(23,0))
             │   │   ├── flags: ∅
             │   │   ├── opening_loc: ∅
-            │   │   ├── content_loc: (17,0)-(18,0) = "a\n"
+            │   │   ├── content_loc: (22,0)-(23,0) = "a\n"
             │   │   ├── closing_loc: ∅
             │   │   └── unescaped: "a\n"
-            │   └── @ StringNode (location: (18,0)-(19,0))
+            │   └── @ StringNode (location: (23,0)-(24,0))
             │       ├── flags: ∅
             │       ├── opening_loc: ∅
-            │       ├── content_loc: (18,0)-(19,0) = "b\n"
+            │       ├── content_loc: (23,0)-(24,0) = "b\n"
             │       ├── closing_loc: ∅
             │       └── unescaped: "b\n"
-            └── closing_loc: (19,0)-(20,0) = "  FOO\n"
+            └── closing_loc: (24,0)-(25,0) = "  FOO\n"
author	Koichi ITO <koic.ito@gmail.com>	2024-03-16 00:30:01 +0900
committer	git <svn-admin@ruby-lang.org>	2024-03-15 18:07:59 +0000
commit	c9da8d67fdb9fab82f76d583239f5b9761f60350 (patch)
tree	cdf016ad2dd3366939325f1e7c55b78e1715e4b8
parent	aceee71c35e0b387691836e756b4e008efd84cf1 (diff)