diff options
| author | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2026-01-20 19:10:16 +0900 |
|---|---|---|
| committer | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2026-01-20 19:10:16 +0900 |
| commit | 58f1127b51cf4fbb1f334f8701a041f40701dca2 (patch) | |
| tree | c30ea24a0651f6e63fdaeea27e57621ca3c19e6c | |
| parent | 35a7b5159f39de2cac848c072674e5350cc41aa4 (diff) | |
Revert "[ruby/prism] Add Ripper :on_sp events for Prism.lex_compat and Prism::Translation::Ripper"
This reverts commit 35a7b5159f39de2cac848c072674e5350cc41aa4.
This broke syntax_suggest.
https://github.com/ruby/ruby/actions/runs/21167011751/job/60874111912
| -rw-r--r-- | lib/prism.rb | 8 | ||||
| -rw-r--r-- | lib/prism/lex_compat.rb | 101 | ||||
| -rw-r--r-- | lib/prism/lex_ripper.rb | 2 | ||||
| -rw-r--r-- | test/prism/fixtures/bom_leading_space.txt | 1 | ||||
| -rw-r--r-- | test/prism/fixtures/bom_spaces.txt | 1 | ||||
| -rw-r--r-- | test/prism/ruby/ripper_test.rb | 12 |
6 files changed, 19 insertions, 106 deletions
diff --git a/lib/prism.rb b/lib/prism.rb index dab3420377..d809557fce 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -61,7 +61,8 @@ module Prism # Prism::lex_compat(source, **options) -> LexCompat::Result # # Returns a parse result whose value is an array of tokens that closely - # resembles the return value of Ripper::lex. + # resembles the return value of Ripper::lex. The main difference is that the + # `:on_sp` token is not emitted. # # For supported options, see Prism::parse. def self.lex_compat(source, **options) @@ -71,8 +72,9 @@ module Prism # :call-seq: # Prism::lex_ripper(source) -> Array # - # This wraps the result of Ripper.lex. It produces almost exactly the - # same tokens. Raises SyntaxError if the syntax in source is invalid. + # This lexes with the Ripper lex. It drops any space events but otherwise + # returns the same tokens. Raises SyntaxError if the syntax in source is + # invalid. def self.lex_ripper(source) LexRipper.new(source).result # steep:ignore end diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 597e63c73e..f7b9a0effc 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -226,7 +226,7 @@ module Prism end # Tokens where state should be ignored - # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end + # used for :on_comment, :on_heredoc_end, :on_embexpr_end class IgnoreStateToken < Token def ==(other) # :nodoc: self[0...-1] == other[0...-1] @@ -611,10 +611,10 @@ module Prism BOM_FLUSHED = RUBY_VERSION >= "3.3.0" private_constant :BOM_FLUSHED - attr_reader :options + attr_reader :source, :options - def initialize(code, **options) - @code = code + def initialize(source, **options) + @source = source @options = options end @@ -624,14 +624,12 @@ module Prism state = :default heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]] - result = Prism.lex(@code, **options) - source = result.source + result = Prism.lex(source, **options) result_value = result.value previous_state = nil #: State? last_heredoc_end = nil #: Integer? - eof_token = nil - bom = source.slice(0, 3) == "\xEF\xBB\xBF" + bom = source.byteslice(0..2) == "\xEF\xBB\xBF" result_value.each_with_index do |(token, lex_state), index| lineno = token.location.start_line @@ -743,7 +741,6 @@ module Prism Token.new([[lineno, column], event, value, lex_state]) when :on_eof - eof_token = token previous_token = result_value[index - 1][0] # If we're at the end of the file and the previous token was a @@ -766,7 +763,7 @@ module Prism end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]) + tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state]) end end @@ -860,89 +857,7 @@ module Prism # We sort by location to compare against Ripper's output tokens.sort_by!(&:location) - # Add :on_sp tokens - tokens = add_on_sp_tokens(tokens, source, result.data_loc, bom, eof_token) - - Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source) - end - - def add_on_sp_tokens(tokens, source, data_loc, bom, eof_token) - new_tokens = [] - - prev_token_state = Translation::Ripper::Lexer::State.cached(Translation::Ripper::EXPR_BEG) - prev_token_end = bom ? 3 : 0 - - tokens.each do |token| - line, column = token.location - start_offset = source.line_to_byte_offset(line) + column - # Ripper reports columns on line 1 without counting the BOM, so we adjust to get the real offset - start_offset += 3 if line == 1 && bom - - if start_offset > prev_token_end - sp_value = source.slice(prev_token_end, start_offset - prev_token_end) - sp_line = source.line(prev_token_end) - sp_column = source.column(prev_token_end) - # Ripper reports columns on line 1 without counting the BOM - sp_column -= 3 if sp_line == 1 && bom - continuation_index = sp_value.byteindex("\\") - - # ripper emits up to three :on_sp tokens when line continuations are used - if continuation_index - next_whitespace_index = continuation_index + 1 - next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r" - next_whitespace_index += 1 - first_whitespace = sp_value[0...continuation_index] - continuation = sp_value[continuation_index...next_whitespace_index] - second_whitespace = sp_value[next_whitespace_index..] - - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - first_whitespace, - prev_token_state - ]) unless first_whitespace.empty? - - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column + continuation_index], - :on_sp, - continuation, - prev_token_state - ]) - - new_tokens << IgnoreStateToken.new([ - [sp_line + 1, 0], - :on_sp, - second_whitespace, - prev_token_state - ]) unless second_whitespace.empty? - else - new_tokens << IgnoreStateToken.new([ - [sp_line, sp_column], - :on_sp, - sp_value, - prev_token_state - ]) - end - end - - new_tokens << token - prev_token_state = token.state - prev_token_end = start_offset + token.value.bytesize - end - - unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl - end_offset = eof_token.location.end_offset - if prev_token_end < end_offset - new_tokens << IgnoreStateToken.new([ - [source.line(prev_token_end), source.column(prev_token_end)], - :on_sp, - source.slice(prev_token_end, end_offset - prev_token_end), - prev_token_state - ]) - end - end - - new_tokens + Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source)) end end diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb index 2054cf55ac..4b5c3b77fd 100644 --- a/lib/prism/lex_ripper.rb +++ b/lib/prism/lex_ripper.rb @@ -19,6 +19,8 @@ module Prism lex(source).each do |token| case token[1] + when :on_sp + # skip when :on_tstring_content if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) previous[2] << token[2] diff --git a/test/prism/fixtures/bom_leading_space.txt b/test/prism/fixtures/bom_leading_space.txt deleted file mode 100644 index 48d3ee50ea..0000000000 --- a/test/prism/fixtures/bom_leading_space.txt +++ /dev/null @@ -1 +0,0 @@ - p (42) diff --git a/test/prism/fixtures/bom_spaces.txt b/test/prism/fixtures/bom_spaces.txt deleted file mode 100644 index c18ad4c21a..0000000000 --- a/test/prism/fixtures/bom_spaces.txt +++ /dev/null @@ -1 +0,0 @@ -p ( 42 ) diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 280abd94ea..2a0504c19f 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -39,8 +39,6 @@ module Prism # Skip these tests that we haven't implemented yet. omitted_sexp_raw = [ - "bom_leading_space.txt", - "bom_spaces.txt", "dos_endings.txt", "heredocs_with_fake_newlines.txt", "heredocs_with_ignored_newlines.txt", @@ -94,7 +92,7 @@ module Prism assert_equal(expected, lexer.parse[0].to_a) assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) - assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end @@ -123,17 +121,15 @@ module Prism def assert_ripper_lex(source) prism = Translation::Ripper.lex(source) ripper = Ripper.lex(source) - - # Prism emits tokens by their order in the code, not in parse order - ripper.sort_by! { |elem| elem[0] } + ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp + ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order [prism.size, ripper.size].max.times do |i| expected = ripper[i] actual = prism[i] - # Since tokens related to heredocs are not emitted in the same order, # the state also doesn't line up. - if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end + if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end expected[3] = actual[3] = nil end |
