diff options
| author | Earlopain <14981592+Earlopain@users.noreply.github.com> | 2026-02-03 12:10:01 +0100 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2026-02-03 12:33:30 +0000 |
| commit | 4bf1cb087bc8ff065b6226037329d7464fa3e96c (patch) | |
| tree | 0b04083563c702b588ddd20faa5ebd2e1bffd477 | |
| parent | cf22fe7f0857f0c980d9966047579b65c7798126 (diff) | |
[ruby/prism] Better guard against syntax invalid code in ripper lex translator
Closes https://github.com/ruby/prism/pull/3899
Also better compatibility by only dropping
the last token if it is actually EOF
https://github.com/ruby/prism/commit/128ab52be9
| -rw-r--r-- | lib/prism/lex_compat.rb | 9 | ||||
| -rw-r--r-- | test/prism/ruby/ripper_test.rb | 11 |
2 files changed, 16 insertions, 4 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 4c516a9de0..5b685716cc 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -758,8 +758,9 @@ module Prism end end - # Drop the EOF token from the list - tokens = tokens[0...-1] + # Drop the EOF token from the list. The EOF token may not be + # present if the source was syntax invalid + tokens = tokens[0...-1] if tokens.dig(-1, 1) == :on_eof # We sort by location because Ripper.lex sorts. tokens.sort_by! do |token| @@ -804,7 +805,7 @@ module Prism next_whitespace_index += 1 first_whitespace = sp_value[0...continuation_index] continuation = sp_value[continuation_index...next_whitespace_index] - second_whitespace = sp_value[next_whitespace_index..] + second_whitespace = sp_value[next_whitespace_index..] || "" new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] @@ -819,7 +820,7 @@ module Prism prev_token_end = start_offset + token[2].bytesize end - unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl + if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl end_offset = eof_token.location.end_offset if prev_token_end < end_offset new_tokens << [ diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 52a5ad7ef4..758505ac2a 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -140,6 +140,17 @@ module Prism assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end + + # On syntax invalid code the output doesn't always match up + # In these cases we just want to make sure that it doesn't raise. + def test_lex_invalid_syntax + assert_nothing_raised do + Translation::Ripper.lex('scan/\p{alpha}/') + end + + assert_equal(Ripper.lex('if;)'), Translation::Ripper.lex('if;)')) + end + def test_tokenize source = "foo;1;BAZ" assert_equal(Ripper.tokenize(source), Translation::Ripper.tokenize(source)) |
