[ruby/prism] Better guard against syntax invalid code in ripper lex translator

Closes https://github.com/ruby/prism/pull/3899 Also better compatibility by only dropping the last token if it is actually EOF https://github.com/ruby/prism/commit/128ab52be9
author: Earlopain <14981592+Earlopain@users.noreply.github.com> 2026-02-03 12:10:01 +0100
committer: git <svn-admin@ruby-lang.org> 2026-02-03 12:33:30 +0000
commit: 4bf1cb087bc8ff065b6226037329d7464fa3e96c (patch)
tree: 0b04083563c702b588ddd20faa5ebd2e1bffd477
parent: cf22fe7f0857f0c980d9966047579b65c7798126 (diff)
2 files changed, 16 insertions, 4 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 4c516a9de0..5b685716cc 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -758,8 +758,9 @@ module Prism
         end
       end
 
-      # Drop the EOF token from the list
-      tokens = tokens[0...-1]
+      # Drop the EOF token from the list. The EOF token may not be
+      # present if the source was syntax invalid
+      tokens = tokens[0...-1] if tokens.dig(-1, 1) == :on_eof
 
       # We sort by location because Ripper.lex sorts.
       tokens.sort_by! do |token|
@@ -804,7 +805,7 @@ module Prism
             next_whitespace_index += 1
             first_whitespace = sp_value[0...continuation_index]
             continuation = sp_value[continuation_index...next_whitespace_index]
-            second_whitespace = sp_value[next_whitespace_index..]
+            second_whitespace = sp_value[next_whitespace_index..] || ""
 
             new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
             new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
@@ -819,7 +820,7 @@ module Prism
         prev_token_end = start_offset + token[2].bytesize
       end
 
-      unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+      if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl
         end_offset = eof_token.location.end_offset
         if prev_token_end < end_offset
           new_tokens << [
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 52a5ad7ef4..758505ac2a 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -140,6 +140,17 @@ module Prism
       assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
     end
 
+
+    # On syntax invalid code the output doesn't always match up
+    # In these cases we just want to make sure that it doesn't raise.
+    def test_lex_invalid_syntax
+      assert_nothing_raised do
+        Translation::Ripper.lex('scan/\p{alpha}/')
+      end
+
+      assert_equal(Ripper.lex('if;)'), Translation::Ripper.lex('if;)'))
+    end
+
     def test_tokenize
       source = "foo;1;BAZ"
       assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))
author	Earlopain <14981592+Earlopain@users.noreply.github.com>	2026-02-03 12:10:01 +0100
committer	git <svn-admin@ruby-lang.org>	2026-02-03 12:33:30 +0000
commit	4bf1cb087bc8ff065b6226037329d7464fa3e96c (patch)
tree	0b04083563c702b588ddd20faa5ebd2e1bffd477
parent	cf22fe7f0857f0c980d9966047579b65c7798126 (diff)