diff options
| author | Earlopain <14981592+Earlopain@users.noreply.github.com> | 2026-01-08 13:47:35 +0100 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2026-01-08 18:35:26 +0000 |
| commit | 16863f2ec1c8cefd852965e58acfcfd61b0194b9 (patch) | |
| tree | dd0e27e2ee83a81c6b0f5779c8415dc803e95370 /lib | |
| parent | fc66de3e6b5e28c017c3cffac77a66d680d679a4 (diff) | |
[ruby/prism] Decouple ripper translator from ripper library
Ripper exposes Ripper::Lexer:State in its output, which is a bit of a problem. To make this work, I basically copy-pasted the implementation.
I'm unsure if that is acceptable and added a test to make sure that these values never go out of sync.
I don't imagine them changing often, prism maps them 1:1 for its own usage.
This also fixed the shim by accident. `Ripper.lex` went to `Translation::Ripper.lex` when it should have been the original. Removing the need for the original resolves that issue.
https://github.com/ruby/prism/commit/2c0bea076d
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/prism/lex_compat.rb | 86 |
1 files changed, 69 insertions, 17 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 48ac768b03..ebfb19e56d 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -2,7 +2,6 @@ # :markup: markdown require "delegate" -require "ripper" module Prism # This class is responsible for lexing the source using prism and then @@ -199,6 +198,58 @@ module Prism "__END__": :on___end__ }.freeze + # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states + # to reimplement to_s without using Ripper. + class State + # Ripper-internal bitflags. + ALL = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] } + ALL[0] = :NONE + ALL.freeze + ALL.each { |value, name| const_set(name, value) } + + # :stopdoc: + + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + + # :startdoc: + + private + + # Convert the state flags into the format exposed by ripper. + def state_name(bits) + ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|") + end + end + # When we produce tokens, we produce the same arrays that Ripper does. # However, we add a couple of convenience methods onto them to make them a # little easier to work with. We delegate all other methods to the array. @@ -249,8 +300,8 @@ module Prism class IdentToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || - (other[3] & Ripper::EXPR_ARG_ANY != 0) + (other[3] == State::LABEL | State::END) || + (other[3] & (State::ARG | State::CMDARG) != 0) ) end end @@ -261,8 +312,8 @@ module Prism def ==(other) # :nodoc: return false unless self[0...-1] == other[0...-1] - if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED - other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0 + if self[3] == State::ARG | State::LABELED + other[3] & State::ARG | State::LABELED != 0 else self[3] == other[3] end @@ -280,8 +331,8 @@ module Prism class ParamToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_END) || - (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) + (other[3] == State::END) || + (other[3] == State::END | State::LABEL) ) end end @@ -615,6 +666,11 @@ module Prism private_constant :Heredoc + # In previous versions of Ruby, Ripper wouldn't flush the bom before the + # first token, so we had to have a hack in place to account for that. + BOM_FLUSHED = RUBY_VERSION >= "3.3.0" + private_constant :BOM_FLUSHED + attr_reader :source, :options def initialize(source, **options) @@ -630,13 +686,9 @@ module Prism result = Prism.lex(source, **options) result_value = result.value - previous_state = nil #: Ripper::Lexer::State? + previous_state = nil #: State? last_heredoc_end = nil #: Integer? - # In previous versions of Ruby, Ripper wouldn't flush the bom before the - # first token, so we had to have a hack in place to account for that. This - # checks for that behavior. - bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 bom = source.byteslice(0..2) == "\xEF\xBB\xBF" result_value.each_with_index do |(token, lex_state), index| @@ -651,7 +703,7 @@ module Prism if bom && lineno == 1 column -= 3 - if index == 0 && column == 0 && !bom_flushed + if index == 0 && column == 0 && !BOM_FLUSHED flushed = case token.type when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, @@ -675,7 +727,7 @@ module Prism event = RIPPER.fetch(token.type) value = token.value - lex_state = Ripper::Lexer::State.new(lex_state) + lex_state = State.new(lex_state) token = case event @@ -689,7 +741,7 @@ module Prism last_heredoc_end = token.location.end_offset IgnoreStateToken.new([[lineno, column], event, value, lex_state]) when :on_ident - if lex_state == Ripper::EXPR_END + if lex_state == State::END # If we have an identifier that follows a method name like: # # def foo bar @@ -699,7 +751,7 @@ module Prism # yet. We do this more accurately, so we need to allow comparing # against both END and END|LABEL. ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL + elsif lex_state == State::END | State::LABEL # In the event that we're comparing identifiers, we're going to # allow a little divergence. Ripper doesn't account for local # variables introduced through named captures in regexes, and we @@ -739,7 +791,7 @@ module Prism counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Ripper::Lexer::State.new(result_value[current_index][1]) + State.new(result_value[current_index][1]) else previous_state end |
