From bf1ac3f4af16edb613b6795a4af253e9d551bd2c Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Fri, 13 Feb 2026 10:49:25 +0100
Subject: [ruby/prism] Fix lexing for unterminated strings/heredocs etc.
When we hit EOF and still have lex modes left, it means some content was unterminated.
Heredocs specifically have logic that needs to happen when the body finished lexing.
If we don't reset the mode back to how it was before, it will not continue lexing at the correct place.
Followup to https://github.com/ruby/prism/pull/3918.
We can't call into `parser_lex` since it resets token locations.
https://github.com/ruby/prism/commit/27c24fdc0d
---
.../errors/unterminated_heredoc_and_embexpr.txt | 11 +++++
.../errors/unterminated_heredoc_and_embexpr_2.txt | 9 ++++
test/prism/errors_test.rb | 22 +++++++++
test/prism/lex_test.rb | 53 ++++++++++++++++++++--
4 files changed, 92 insertions(+), 3 deletions(-)
create mode 100644 test/prism/errors/unterminated_heredoc_and_embexpr.txt
create mode 100644 test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
(limited to 'test')
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
new file mode 100644
index 0000000000..bed7fcd24e
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
@@ -0,0 +1,11 @@
+<= "3.3"
- def test_lex_compare
- prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
- ripper = Ripper.lex(File.read(__FILE__))
+ def test_lex_compat
+ source = "foo bar"
+ prism = Prism.lex_compat(source, version: "current").value
+ ripper = Ripper.lex(source)
assert_equal(ripper, prism)
end
end
+
+ def test_lex_interpolation_unterminated
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN EOF],
+ token_types('"#{')
+ )
+
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
+ token_types('"#{' + "\n")
+ )
+ end
+
+ def test_lex_interpolation_unterminated_with_content
+ # FIXME: Emits EOL twice.
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
+ token_types('"#{C')
+ )
+
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
+ token_types('"#{C' + "\n")
+ )
+ end
+
+ def test_lex_heredoc_unterminated
+ code = <<~'RUBY'.strip
+ <