summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEarlopain <14981592+Earlopain@users.noreply.github.com>2026-01-08 13:47:35 +0100
committergit <svn-admin@ruby-lang.org>2026-01-08 18:35:26 +0000
commit16863f2ec1c8cefd852965e58acfcfd61b0194b9 (patch)
treedd0e27e2ee83a81c6b0f5779c8415dc803e95370 /lib
parentfc66de3e6b5e28c017c3cffac77a66d680d679a4 (diff)
[ruby/prism] Decouple ripper translator from ripper library
Ripper exposes Ripper::Lexer:State in its output, which is a bit of a problem. To make this work, I basically copy-pasted the implementation. I'm unsure if that is acceptable and added a test to make sure that these values never go out of sync. I don't imagine them changing often, prism maps them 1:1 for its own usage. This also fixed the shim by accident. `Ripper.lex` went to `Translation::Ripper.lex` when it should have been the original. Removing the need for the original resolves that issue. https://github.com/ruby/prism/commit/2c0bea076d
Diffstat (limited to 'lib')
-rw-r--r--lib/prism/lex_compat.rb86
1 files changed, 69 insertions, 17 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 48ac768b03..ebfb19e56d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -2,7 +2,6 @@
# :markup: markdown
require "delegate"
-require "ripper"
module Prism
# This class is responsible for lexing the source using prism and then
@@ -199,6 +198,58 @@ module Prism
"__END__": :on___end__
}.freeze
+ # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states
+ # to reimplement to_s without using Ripper.
+ class State
+ # Ripper-internal bitflags.
+ ALL = %i[
+ BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+ ].map.with_index.to_h { |name, i| [2 ** i, name] }
+ ALL[0] = :NONE
+ ALL.freeze
+ ALL.each { |value, name| const_set(name, value) }
+
+ # :stopdoc:
+
+ attr_reader :to_int, :to_s
+
+ def initialize(i)
+ @to_int = i
+ @to_s = state_name(i)
+ freeze
+ end
+
+ def [](index)
+ case index
+ when 0, :to_int
+ @to_int
+ when 1, :to_s
+ @to_s
+ else
+ nil
+ end
+ end
+
+ alias to_i to_int
+ alias inspect to_s
+ def pretty_print(q) q.text(to_s) end
+ def ==(i) super or to_int == i end
+ def &(i) self.class.new(to_int & i) end
+ def |(i) self.class.new(to_int | i) end
+ def allbits?(i) to_int.allbits?(i) end
+ def anybits?(i) to_int.anybits?(i) end
+ def nobits?(i) to_int.nobits?(i) end
+
+ # :startdoc:
+
+ private
+
+ # Convert the state flags into the format exposed by ripper.
+ def state_name(bits)
+ ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|")
+ end
+ end
+
# When we produce tokens, we produce the same arrays that Ripper does.
# However, we add a couple of convenience methods onto them to make them a
# little easier to work with. We delegate all other methods to the array.
@@ -249,8 +300,8 @@ module Prism
class IdentToken < Token
def ==(other) # :nodoc:
(self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
- (other[3] & Ripper::EXPR_ARG_ANY != 0)
+ (other[3] == State::LABEL | State::END) ||
+ (other[3] & (State::ARG | State::CMDARG) != 0)
)
end
end
@@ -261,8 +312,8 @@ module Prism
def ==(other) # :nodoc:
return false unless self[0...-1] == other[0...-1]
- if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
- other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
+ if self[3] == State::ARG | State::LABELED
+ other[3] & State::ARG | State::LABELED != 0
else
self[3] == other[3]
end
@@ -280,8 +331,8 @@ module Prism
class ParamToken < Token
def ==(other) # :nodoc:
(self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_END) ||
- (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
+ (other[3] == State::END) ||
+ (other[3] == State::END | State::LABEL)
)
end
end
@@ -615,6 +666,11 @@ module Prism
private_constant :Heredoc
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+ # first token, so we had to have a hack in place to account for that.
+ BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
+ private_constant :BOM_FLUSHED
+
attr_reader :source, :options
def initialize(source, **options)
@@ -630,13 +686,9 @@ module Prism
result = Prism.lex(source, **options)
result_value = result.value
- previous_state = nil #: Ripper::Lexer::State?
+ previous_state = nil #: State?
last_heredoc_end = nil #: Integer?
- # In previous versions of Ruby, Ripper wouldn't flush the bom before the
- # first token, so we had to have a hack in place to account for that. This
- # checks for that behavior.
- bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
result_value.each_with_index do |(token, lex_state), index|
@@ -651,7 +703,7 @@ module Prism
if bom && lineno == 1
column -= 3
- if index == 0 && column == 0 && !bom_flushed
+ if index == 0 && column == 0 && !BOM_FLUSHED
flushed =
case token.type
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
@@ -675,7 +727,7 @@ module Prism
event = RIPPER.fetch(token.type)
value = token.value
- lex_state = Ripper::Lexer::State.new(lex_state)
+ lex_state = State.new(lex_state)
token =
case event
@@ -689,7 +741,7 @@ module Prism
last_heredoc_end = token.location.end_offset
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
when :on_ident
- if lex_state == Ripper::EXPR_END
+ if lex_state == State::END
# If we have an identifier that follows a method name like:
#
# def foo bar
@@ -699,7 +751,7 @@ module Prism
# yet. We do this more accurately, so we need to allow comparing
# against both END and END|LABEL.
ParamToken.new([[lineno, column], event, value, lex_state])
- elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
+ elsif lex_state == State::END | State::LABEL
# In the event that we're comparing identifiers, we're going to
# allow a little divergence. Ripper doesn't account for local
# variables introduced through named captures in regexes, and we
@@ -739,7 +791,7 @@ module Prism
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end
- Ripper::Lexer::State.new(result_value[current_index][1])
+ State.new(result_value[current_index][1])
else
previous_state
end