summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2026-02-03 12:10:01 +0100
committergit <svn-admin@ruby-lang.org>2026-02-03 12:33:30 +0000
commit4bf1cb087bc8ff065b6226037329d7464fa3e96c (patch)
tree0b04083563c702b588ddd20faa5ebd2e1bffd477
parentcf22fe7f0857f0c980d9966047579b65c7798126 (diff)
[ruby/prism] Better guard against syntax invalid code in ripper lex translator
Closes https://github.com/ruby/prism/pull/3899 Also better compatibility by only dropping the last token if it is actually EOF https://github.com/ruby/prism/commit/128ab52be9
-rw-r--r--lib/prism/lex_compat.rb9
-rw-r--r--test/prism/ruby/ripper_test.rb11
2 files changed, 16 insertions, 4 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 4c516a9de0..5b685716cc 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -758,8 +758,9 @@ module Prism
end
end
- # Drop the EOF token from the list
- tokens = tokens[0...-1]
+ # Drop the EOF token from the list. The EOF token may not be
+ # present if the source was syntax invalid
+ tokens = tokens[0...-1] if tokens.dig(-1, 1) == :on_eof
# We sort by location because Ripper.lex sorts.
tokens.sort_by! do |token|
@@ -804,7 +805,7 @@ module Prism
next_whitespace_index += 1
first_whitespace = sp_value[0...continuation_index]
continuation = sp_value[continuation_index...next_whitespace_index]
- second_whitespace = sp_value[next_whitespace_index..]
+ second_whitespace = sp_value[next_whitespace_index..] || ""
new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
@@ -819,7 +820,7 @@ module Prism
prev_token_end = start_offset + token[2].bytesize
end
- unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+ if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl
end_offset = eof_token.location.end_offset
if prev_token_end < end_offset
new_tokens << [
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 52a5ad7ef4..758505ac2a 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -140,6 +140,17 @@ module Prism
assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
end
+
+ # On syntax invalid code the output doesn't always match up
+ # In these cases we just want to make sure that it doesn't raise.
+ def test_lex_invalid_syntax
+ assert_nothing_raised do
+ Translation::Ripper.lex('scan/\p{alpha}/')
+ end
+
+ assert_equal(Ripper.lex('if;)'), Translation::Ripper.lex('if;)'))
+ end
+
def test_tokenize
source = "foo;1;BAZ"
assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source))