[ruby/prism] Decouple ripper translator from ripper library

Ripper exposes Ripper::Lexer:State in its output, which is a bit of a problem. To make this work, I basically copy-pasted the implementation. I'm unsure if that is acceptable and added a test to make sure that these values never go out of sync. I don't imagine them changing often, prism maps them 1:1 for its own usage. This also fixed the shim by accident. `Ripper.lex` went to `Translation::Ripper.lex` when it should have been the original. Removing the need for the original resolves that issue. https://github.com/ruby/prism/commit/2c0bea076d
author: Earlopain <14981592+Earlopain@users.noreply.github.com> 2026-01-08 13:47:35 +0100
committer: git <svn-admin@ruby-lang.org> 2026-01-08 18:35:26 +0000
commit: 16863f2ec1c8cefd852965e58acfcfd61b0194b9 (patch)
tree: dd0e27e2ee83a81c6b0f5779c8415dc803e95370 /lib
parent: fc66de3e6b5e28c017c3cffac77a66d680d679a4 (diff)
1 files changed, 69 insertions, 17 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 48ac768b03..ebfb19e56d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -2,7 +2,6 @@
 # :markup: markdown
 
 require "delegate"
-require "ripper"
 
 module Prism
   # This class is responsible for lexing the source using prism and then
@@ -199,6 +198,58 @@ module Prism
       "__END__": :on___end__
     }.freeze
 
+    # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states
+    # to reimplement to_s without using Ripper.
+    class State
+      # Ripper-internal bitflags.
+      ALL = %i[
+        BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+      ].map.with_index.to_h { |name, i| [2 ** i, name] }
+      ALL[0] = :NONE
+      ALL.freeze
+      ALL.each { |value, name| const_set(name, value) }
+
+      # :stopdoc:
+
+      attr_reader :to_int, :to_s
+
+      def initialize(i)
+        @to_int = i
+        @to_s = state_name(i)
+        freeze
+      end
+
+      def [](index)
+        case index
+        when 0, :to_int
+          @to_int
+        when 1, :to_s
+          @to_s
+        else
+          nil
+        end
+      end
+
+      alias to_i to_int
+      alias inspect to_s
+      def pretty_print(q) q.text(to_s) end
+      def ==(i) super or to_int == i end
+      def &(i) self.class.new(to_int & i) end
+      def |(i) self.class.new(to_int | i) end
+      def allbits?(i) to_int.allbits?(i) end
+      def anybits?(i) to_int.anybits?(i) end
+      def nobits?(i) to_int.nobits?(i) end
+
+      # :startdoc:
+
+      private
+
+      # Convert the state flags into the format exposed by ripper.
+      def state_name(bits)
+        ALL.filter_map { |flag, name| name if bits & flag != 0  }.join("|")
+      end
+    end
+
     # When we produce tokens, we produce the same arrays that Ripper does.
     # However, we add a couple of convenience methods onto them to make them a
     # little easier to work with. We delegate all other methods to the array.
@@ -249,8 +300,8 @@ module Prism
     class IdentToken < Token
       def ==(other) # :nodoc:
         (self[0...-1] == other[0...-1]) && (
-          (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
-          (other[3] & Ripper::EXPR_ARG_ANY != 0)
+          (other[3] == State::LABEL | State::END) ||
+          (other[3] & (State::ARG | State::CMDARG) != 0)
         )
       end
     end
@@ -261,8 +312,8 @@ module Prism
       def ==(other) # :nodoc:
         return false unless self[0...-1] == other[0...-1]
 
-        if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
-          other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
+        if self[3] == State::ARG | State::LABELED
+          other[3] & State::ARG | State::LABELED != 0
         else
           self[3] == other[3]
         end
@@ -280,8 +331,8 @@ module Prism
     class ParamToken < Token
       def ==(other) # :nodoc:
         (self[0...-1] == other[0...-1]) && (
-          (other[3] == Ripper::EXPR_END) ||
-          (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
+          (other[3] == State::END) ||
+          (other[3] == State::END | State::LABEL)
         )
       end
     end
@@ -615,6 +666,11 @@ module Prism
 
     private_constant :Heredoc
 
+    # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+    # first token, so we had to have a hack in place to account for that.
+    BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
+    private_constant :BOM_FLUSHED
+
     attr_reader :source, :options
 
     def initialize(source, **options)
@@ -630,13 +686,9 @@ module Prism
 
       result = Prism.lex(source, **options)
       result_value = result.value
-      previous_state = nil #: Ripper::Lexer::State?
+      previous_state = nil #: State?
       last_heredoc_end = nil #: Integer?
 
-      # In previous versions of Ruby, Ripper wouldn't flush the bom before the
-      # first token, so we had to have a hack in place to account for that. This
-      # checks for that behavior.
-      bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
       bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
 
       result_value.each_with_index do |(token, lex_state), index|
@@ -651,7 +703,7 @@ module Prism
         if bom && lineno == 1
           column -= 3
 
-          if index == 0 && column == 0 && !bom_flushed
+          if index == 0 && column == 0 && !BOM_FLUSHED
             flushed =
               case token.type
               when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
@@ -675,7 +727,7 @@ module Prism
 
         event = RIPPER.fetch(token.type)
         value = token.value
-        lex_state = Ripper::Lexer::State.new(lex_state)
+        lex_state = State.new(lex_state)
 
         token =
           case event
@@ -689,7 +741,7 @@ module Prism
             last_heredoc_end = token.location.end_offset
             IgnoreStateToken.new([[lineno, column], event, value, lex_state])
           when :on_ident
-            if lex_state == Ripper::EXPR_END
+            if lex_state == State::END
               # If we have an identifier that follows a method name like:
               #
               #     def foo bar
@@ -699,7 +751,7 @@ module Prism
               # yet. We do this more accurately, so we need to allow comparing
               # against both END and END|LABEL.
               ParamToken.new([[lineno, column], event, value, lex_state])
-            elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
+            elsif lex_state == State::END | State::LABEL
               # In the event that we're comparing identifiers, we're going to
               # allow a little divergence. Ripper doesn't account for local
               # variables introduced through named captures in regexes, and we
@@ -739,7 +791,7 @@ module Prism
                   counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
                 end
 
-                Ripper::Lexer::State.new(result_value[current_index][1])
+                State.new(result_value[current_index][1])
               else
                 previous_state
               end
author	Earlopain <14981592+Earlopain@users.noreply.github.com>	2026-01-08 13:47:35 +0100
committer	git <svn-admin@ruby-lang.org>	2026-01-08 18:35:26 +0000
commit	16863f2ec1c8cefd852965e58acfcfd61b0194b9 (patch)
tree	dd0e27e2ee83a81c6b0f5779c8415dc803e95370 /lib
parent	fc66de3e6b5e28c017c3cffac77a66d680d679a4 (diff)