5 files changed, 107 insertions, 4 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 34e5d38b0a..ca0f2a55aa 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9783,6 +9783,12 @@ parser_lex(pm_parser_t *parser) {
     unsigned int semantic_token_seen = parser->semantic_token_seen;
     parser->semantic_token_seen = true;
 
+    // We'll jump to this label when we are about to encounter an EOF.
+    // If we still have lex_modes on the stack, we pop them so that cleanup
+    // can happen. For example, we should still continue parsing after a heredoc
+    // identifier, even if the heredoc body was syntax invalid.
+    switch_lex_modes:
+
     switch (parser->lex_modes.current->mode) {
         case PM_LEX_DEFAULT:
         case PM_LEX_EMBEXPR:
@@ -9856,6 +9862,14 @@ parser_lex(pm_parser_t *parser) {
             // We'll check if we're at the end of the file. If we are, then we
             // need to return the EOF token.
             if (parser->current.end >= parser->end) {
+                // We may be missing closing tokens. We should pop modes one by one
+                // to do the appropriate cleanup like moving next_start for heredocs.
+                // Only when no mode is remaining will we actually emit the EOF token.
+                if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+                    lex_mode_pop(parser);
+                    goto switch_lex_modes;
+                }
+
                 // If we hit EOF, but the EOF came immediately after a newline,
                 // set the start of the token to the newline.  This way any EOF
                 // errors will be reported as happening on that line rather than
@@ -15433,7 +15447,7 @@ parse_string_part(pm_parser_t *parser, uint16_t depth) {
             pm_token_t opening = parser->previous;
             pm_statements_node_t *statements = NULL;
 
-            if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
+            if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
                 pm_accepts_block_stack_push(parser, true);
                 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
                 pm_accepts_block_stack_pop(parser);
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
new file mode 100644
index 0000000000..bed7fcd24e
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
@@ -0,0 +1,11 @@
+<<A+B
+  ^ unterminated heredoc; can't find string "A" anywhere before EOF
+   ^ unexpected '+', ignoring it
+  ^ unterminated heredoc; can't find string "A" anywhere before EOF
+#{C
+   ^ unexpected heredoc ending; expected an argument
+   ^ unexpected heredoc ending, expecting end-of-input
+   ^ unexpected heredoc ending, ignoring it
+   ^ unexpected end-of-input, assuming it is closing the parent top level context
+^ expected a `}` to close the embedded expression
+
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt b/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
new file mode 100644
index 0000000000..a03ff1d212
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
@@ -0,0 +1,9 @@
+<<A+B
+  ^ unterminated heredoc; can't find string "A" anywhere before EOF
+#{C + "#{"}
+           ^ unterminated string meets end of file
+           ^ unexpected end-of-input, assuming it is closing the parent top level context
+           ^ expected a `}` to close the embedded expression
+      ^ unterminated string; expected a closing delimiter for the interpolated string
+           ^ expected a `}` to close the embedded expression
+
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index b30a0f304d..27610e89d3 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -87,6 +87,28 @@ module Prism
       assert_nil(statement.end_keyword)
     end
 
+    def test_unclosed_interpolation
+      statement = Prism.parse_statement("\"\#{")
+      assert_equal('"', statement.opening)
+      assert_nil(statement.closing)
+
+      assert_equal(1, statement.parts.count)
+      assert_equal('#{', statement.parts[0].opening)
+      assert_equal("", statement.parts[0].closing)
+      assert_nil(statement.parts[0].statements)
+    end
+
+    def test_unclosed_heredoc_and_interpolation
+      statement = Prism.parse_statement("<<D\n\#{")
+      assert_equal("<<D", statement.opening)
+      assert_nil(statement.closing)
+
+      assert_equal(1, statement.parts.count)
+      assert_equal('#{', statement.parts[0].opening)
+      assert_equal("", statement.parts[0].closing)
+      assert_nil(statement.parts[0].statements)
+    end
+
     private
 
     def assert_errors(filepath, version)
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
index 9a9f203c28..8ea7ce7e9b 100644
--- a/test/prism/lex_test.rb
+++ b/test/prism/lex_test.rb
@@ -48,11 +48,58 @@ module Prism
     end
 
     if RUBY_VERSION >= "3.3"
-      def test_lex_compare
-        prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
-        ripper = Ripper.lex(File.read(__FILE__))
+      def test_lex_compat
+        source = "foo bar"
+        prism = Prism.lex_compat(source, version: "current").value
+        ripper = Ripper.lex(source)
         assert_equal(ripper, prism)
       end
     end
+
+    def test_lex_interpolation_unterminated
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN EOF],
+        token_types('"#{')
+      )
+
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
+        token_types('"#{' + "\n")
+      )
+    end
+
+    def test_lex_interpolation_unterminated_with_content
+      # FIXME: Emits EOL twice.
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
+        token_types('"#{C')
+      )
+
+      assert_equal(
+        %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
+        token_types('"#{C' + "\n")
+      )
+    end
+
+    def test_lex_heredoc_unterminated
+      code = <<~'RUBY'.strip
+        <<A+B
+        #{C
+      RUBY
+
+      assert_equal(
+        %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT HEREDOC_END PLUS CONSTANT NEWLINE EOF],
+        token_types(code)
+      )
+
+      assert_equal(
+        %i[HEREDOC_START EMBEXPR_BEGIN CONSTANT NEWLINE HEREDOC_END PLUS CONSTANT NEWLINE EOF],
+        token_types(code + "\n")
+      )
+    end
+
+    def token_types(code)
+      Prism.lex(code).value.map { |token, _state| token.type }
+    end
   end
 end