[ruby/prism] Fix token incompatibility for `Prism::Translation::Parser::Lexer`

This PR fixes token incompatibility for `Prism::Translation::Parser::Lexer` when using backquoted heredoc indetiner: ```ruby <<-` FOO` a b FOO ``` ## Parser gem (Expected) Returns `tXSTRING_BEG` as the first token: ```console $ bundle exec ruby -Ilib -rparser/ruby33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Parser::Ruby33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "a\n"), s(:str, "b\n")), [], [[:tXSTRING_BEG, ["<<`", #<Parser::Source::Range example.rb 0...10>]], [:tSTRING_CONTENT, ["a\n", #<Parser::Source::Range example.rb 11...13>]], [:tSTRING_CONTENT, ["b\n", #<Parser::Source::Range example.rb 13...15>]], [:tSTRING_END, [" FOO", #<Parser::Source::Range example.rb 15...23>]], [:tNL, [nil, #<Parser::Source::Range example.rb 10...11>]]]] ``` ## `Prism::Translation::Parser` (Actual) Previously, the tokens returned by the Parser gem were different. The escaped backslash does not match in the `tSTRING_BEG` token and value of `tSTRING_END` token. ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "a\n"), s(:str, "b\n")), [], [[:tSTRING_BEG, ["<<\"", #<Parser::Source::Range example.rb 0...10>]], [:tSTRING_CONTENT, ["a\n", #<Parser::Source::Range example.rb 11...13>]], [:tSTRING_CONTENT, ["b\n", #<Parser::Source::Range example.rb 13...15>]], [:tSTRING_END, ["` FOO`", #<Parser::Source::Range example.rb 15...23>]], [:tNL, [nil, #<Parser::Source::Range example.rb 10...11>]]]] ``` After this correction, the AST and tokens returned by the Parser gem are the same: ```console $ bunlde exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("example.rb"); buf.source = File.read("example.rb"); p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "a\n"), s(:str, "b\n")), [], [[:tXSTRING_BEG, ["<<`", #<Parser::Source::Range example.rb 0...10>]], [:tSTRING_CONTENT, ["a\n", #<Parser::Source::Range example.rb 11...13>]], [:tSTRING_CONTENT, ["b\n", #<Parser::Source::Range example.rb 13...15>]], [:tSTRING_END, [" FOO", #<Parser::Source::Range example.rb 15...23>]], [:tNL, [nil, #<Parser::Source::Range example.rb 10...11>]]]] ``` https://github.com/ruby/prism/commit/308f8d85a1
author: Koichi ITO <koic.ito@gmail.com> 2024-03-16 15:21:36 +0900
committer: git <svn-admin@ruby-lang.org> 2024-03-16 17:55:38 +0000
commit: 3605d6076dab516c7b483a8be6038d5b6da1845a (patch)
tree: e0dc7e6b318cc126dccdd4d8b4f032a845735e51
parent: 815c7e197cfa193fc83844f2b988e26d0a56464d (diff)
3 files changed, 38 insertions, 22 deletions
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index cb23fe8ac0..9d7caae0ba 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -278,7 +278,7 @@ module Prism
               value = nil
             when :tSTRING_BEG
               if token.type == :HEREDOC_START
-                heredoc_identifier_stack.push(value.match(/<<[-~]?["']?(?<heredoc_identifier>.*?)["']?\z/)[:heredoc_identifier])
+                heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
               end
               if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
                 next_location = token.location.join(next_token.location)
@@ -294,7 +294,12 @@ module Prism
                 index += 2
               elsif value.start_with?("<<")
                 quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
-                value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
+                if quote == "`"
+                  type = :tXSTRING_BEG
+                  value = "<<`"
+                else
+                  value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
+                end
               end
             when :tSTRING_CONTENT
               unless (lines = token.value.lines).one?
diff --git a/test/prism/fixtures/heredocs_leading_whitespace.txt b/test/prism/fixtures/heredocs_leading_whitespace.txt
index 8f19836943..660ecb4543 100644
--- a/test/prism/fixtures/heredocs_leading_whitespace.txt
+++ b/test/prism/fixtures/heredocs_leading_whitespace.txt
@@ -8,6 +8,11 @@ a
 b
      FOO
 
+<<-`  FOO`
+a
+b
+     FOO
+
 <<-'  FOO'
 a
 b
diff --git a/test/prism/snapshots/heredocs_leading_whitespace.txt b/test/prism/snapshots/heredocs_leading_whitespace.txt
index 5412f7d290..332dfa2986 100644
--- a/test/prism/snapshots/heredocs_leading_whitespace.txt
+++ b/test/prism/snapshots/heredocs_leading_whitespace.txt
@@ -1,8 +1,8 @@
-@ ProgramNode (location: (1,0)-(21,10))
+@ ProgramNode (location: (1,0)-(26,10))
 ├── locals: []
 └── statements:
-    @ StatementsNode (location: (1,0)-(21,10))
-    └── body: (length: 5)
+    @ StatementsNode (location: (1,0)-(26,10))
+    └── body: (length: 6)
         ├── @ StringNode (location: (1,0)-(1,10))
         │   ├── flags: ∅
         │   ├── opening_loc: (1,0)-(1,10) = "<<-'  FOO'"
@@ -15,41 +15,47 @@
         │   ├── content_loc: (7,0)-(9,0) = "a\nb\n"
         │   ├── closing_loc: (9,0)-(10,0) = "     FOO\n"
         │   └── unescaped: "a\nb\n"
-        ├── @ StringNode (location: (11,0)-(11,10))
+        ├── @ XStringNode (location: (11,0)-(11,10))
         │   ├── flags: ∅
-        │   ├── opening_loc: (11,0)-(11,10) = "<<-'  FOO'"
+        │   ├── opening_loc: (11,0)-(11,10) = "<<-`  FOO`"
         │   ├── content_loc: (12,0)-(14,0) = "a\nb\n"
-        │   ├── closing_loc: (14,0)-(15,0) = "  FOO\n"
+        │   ├── closing_loc: (14,0)-(15,0) = "     FOO\n"
         │   └── unescaped: "a\nb\n"
-        ├── @ InterpolatedStringNode (location: (16,0)-(16,10))
-        │   ├── opening_loc: (16,0)-(16,10) = "<<~'  FOO'"
+        ├── @ StringNode (location: (16,0)-(16,10))
+        │   ├── flags: ∅
+        │   ├── opening_loc: (16,0)-(16,10) = "<<-'  FOO'"
+        │   ├── content_loc: (17,0)-(19,0) = "a\nb\n"
+        │   ├── closing_loc: (19,0)-(20,0) = "  FOO\n"
+        │   └── unescaped: "a\nb\n"
+        ├── @ InterpolatedStringNode (location: (21,0)-(21,10))
+        │   ├── opening_loc: (21,0)-(21,10) = "<<~'  FOO'"
         │   ├── parts: (length: 2)
-        │   │   ├── @ StringNode (location: (17,0)-(18,0))
+        │   │   ├── @ StringNode (location: (22,0)-(23,0))
         │   │   │   ├── flags: ∅
         │   │   │   ├── opening_loc: ∅
-        │   │   │   ├── content_loc: (17,0)-(18,0) = "a\n"
+        │   │   │   ├── content_loc: (22,0)-(23,0) = "a\n"
         │   │   │   ├── closing_loc: ∅
         │   │   │   └── unescaped: "a\n"
-        │   │   └── @ StringNode (location: (18,0)-(19,0))
+        │   │   └── @ StringNode (location: (23,0)-(24,0))
         │   │       ├── flags: ∅
         │   │       ├── opening_loc: ∅
-        │   │       ├── content_loc: (18,0)-(19,0) = "b\n"
+        │   │       ├── content_loc: (23,0)-(24,0) = "b\n"
         │   │       ├── closing_loc: ∅
         │   │       └── unescaped: "b\n"
-        │   └── closing_loc: (19,0)-(20,0) = "     FOO\n"
-        └── @ InterpolatedStringNode (location: (21,0)-(21,10))
-            ├── opening_loc: (21,0)-(21,10) = "<<~'  FOO'"
+        │   └── closing_loc: (24,0)-(25,0) = "     FOO\n"
+        └── @ InterpolatedStringNode (location: (26,0)-(26,10))
+            ├── opening_loc: (26,0)-(26,10) = "<<~'  FOO'"
             ├── parts: (length: 2)
-            │   ├── @ StringNode (location: (22,0)-(23,0))
+            │   ├── @ StringNode (location: (27,0)-(28,0))
             │   │   ├── flags: ∅
             │   │   ├── opening_loc: ∅
-            │   │   ├── content_loc: (22,0)-(23,0) = "a\n"
+            │   │   ├── content_loc: (27,0)-(28,0) = "a\n"
             │   │   ├── closing_loc: ∅
             │   │   └── unescaped: "a\n"
-            │   └── @ StringNode (location: (23,0)-(24,0))
+            │   └── @ StringNode (location: (28,0)-(29,0))
             │       ├── flags: ∅
             │       ├── opening_loc: ∅
-            │       ├── content_loc: (23,0)-(24,0) = "b\n"
+            │       ├── content_loc: (28,0)-(29,0) = "b\n"
             │       ├── closing_loc: ∅
             │       └── unescaped: "b\n"
-            └── closing_loc: (24,0)-(25,0) = "  FOO\n"
+            └── closing_loc: (29,0)-(30,0) = "  FOO\n"
author	Koichi ITO <koic.ito@gmail.com>	2024-03-16 15:21:36 +0900
committer	git <svn-admin@ruby-lang.org>	2024-03-16 17:55:38 +0000
commit	3605d6076dab516c7b483a8be6038d5b6da1845a (patch)
tree	e0dc7e6b318cc126dccdd4d8b4f032a845735e51
parent	815c7e197cfa193fc83844f2b988e26d0a56464d (diff)