[ruby/prism] Remove tokens from lex compat

Instead of having custom classes, use arrays and track which tokens we should ignore the state for in the test. https://github.com/ruby/prism/commit/a333b56ada
author: Kevin Newton <kddnewton@gmail.com> 2026-01-28 09:12:51 -0500
committer: git <svn-admin@ruby-lang.org> 2026-01-28 14:58:55 +0000
commit: 01ace0655ed84708f0afdcc74fb779e680bfc4e0 (patch)
tree: a63b6f791e2b56213a81c16ea282dfcd71e1f1f3
parent: 3b22e32fa50c2c18663be87dad4d11a266954773 (diff)
3 files changed, 46 insertions, 120 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index c23adda241..4c516a9de0 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -196,57 +196,6 @@ module Prism
       "__END__": :on___end__
     }.freeze
 
-    # When we produce tokens, we produce the same arrays that Ripper does.
-    # However, we add a couple of convenience methods onto them to make them a
-    # little easier to work with. We delegate all other methods to the array.
-    class Token < BasicObject
-      # Create a new token object with the given ripper-compatible array.
-      def initialize(array)
-        @array = array
-      end
-
-      # The location of the token in the source.
-      def location
-        @array[0]
-      end
-
-      # The type of the token.
-      def event
-        @array[1]
-      end
-
-      # The slice of the source that this token represents.
-      def value
-        @array[2]
-      end
-
-      # The state of the lexer when this token was produced.
-      def state
-        @array[3]
-      end
-
-      # We want to pretend that this is just an Array.
-      def ==(other) # :nodoc:
-        @array == other
-      end
-
-      def respond_to_missing?(name, include_private = false) # :nodoc:
-        @array.respond_to?(name, include_private)
-      end
-
-      def method_missing(name, ...) # :nodoc:
-        @array.send(name, ...)
-      end
-    end
-
-    # Tokens where state should be ignored
-    # used for :on_sp, :on_comment, :on_heredoc_end, :on_embexpr_end
-    class IgnoreStateToken < Token
-      def ==(other) # :nodoc:
-        self[0...-1] == other[0...-1]
-      end
-    end
-
     # A heredoc in this case is a list of tokens that belong to the body of the
     # heredoc that should be appended onto the list of tokens when the heredoc
     # closes.
@@ -290,7 +239,7 @@ module Prism
           embexpr_balance = 0
 
           tokens.each_with_object([]) do |token, results| #$ Array[Token]
-            case token.event
+            case token[1]
             when :on_embexpr_beg
               embexpr_balance += 1
               results << token
@@ -305,9 +254,9 @@ module Prism
                 if split
                   # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
                   # to keep the delimiter in the result.
-                  token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+                  token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
                     column = 0 if index > 0
-                    results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+                    results << [[lineno, column], :on_tstring_content, value, token[3]]
                     lineno += value.count("\n")
                   end
                 else
@@ -350,7 +299,7 @@ module Prism
         # whitespace on plain string content tokens. This allows us to later
         # remove that amount of whitespace from the beginning of each line.
         def <<(token)
-          case token.event
+          case token[1]
           when :on_embexpr_beg, :on_heredoc_beg
             @embexpr_balance += 1
             @dedent = 0 if @dedent_next && @ended_on_newline
@@ -358,7 +307,7 @@ module Prism
             @embexpr_balance -= 1
           when :on_tstring_content
             if embexpr_balance == 0
-              line = token.value
+              line = token[2]
 
               if dedent_next && !(line.strip.empty? && line.end_with?("\n"))
                 leading = line[/\A(\s*)\n?/, 1]
@@ -381,7 +330,7 @@ module Prism
             end
           end
 
-          @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+          @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0
           @ended_on_newline = false
           tokens << token
         end
@@ -394,7 +343,7 @@ module Prism
             embexpr_balance = 0
 
             tokens.each do |token|
-              case token.event
+              case token[1]
               when :on_embexpr_beg, :on_heredoc_beg
                 embexpr_balance += 1
                 results << token
@@ -406,9 +355,9 @@ module Prism
                   lineno = token[0][0]
                   column = token[0][1]
 
-                  token.value.split(/(?<=\n)/).each_with_index do |value, index|
+                  token[2].split(/(?<=\n)/).each_with_index do |value, index|
                     column = 0 if index > 0
-                    results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+                    results << [[lineno, column], :on_tstring_content, value, token[3]]
                     lineno += 1
                   end
                 else
@@ -436,15 +385,15 @@ module Prism
               results << token
               index += 1
 
-              case token.event
+              case token[1]
               when :on_embexpr_beg, :on_heredoc_beg
                 embexpr_balance += 1
               when :on_embexpr_end, :on_heredoc_end
                 embexpr_balance -= 1
               when :on_tstring_content
                 if embexpr_balance == 0
-                  while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
-                    token.value << tokens[index].value
+                  while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/)
+                    token[2] << tokens[index][2]
                     index += 1
                   end
                 end
@@ -467,7 +416,7 @@ module Prism
             # whitespace calculation we performed above. This is because
             # checking if the subsequent token needs to be dedented is common to
             # both the dedent calculation and the ignored_sp insertion.
-            case token.event
+            case token[1]
             when :on_embexpr_beg
               embexpr_balance += 1
               results << token
@@ -479,7 +428,7 @@ module Prism
                 # Here we're going to split the string on newlines, but maintain
                 # the newlines in the resulting array. We'll do that with a look
                 # behind assertion.
-                splits = token.value.split(/(?<=\n)/)
+                splits = token[2].split(/(?<=\n)/)
                 index = 0
 
                 while index < splits.length
@@ -536,12 +485,12 @@ module Prism
                       ignored = deleted_chars.join
                       line.delete_prefix!(ignored)
 
-                      results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+                      results << [[lineno, 0], :on_ignored_sp, ignored, token[3]]
                       column = ignored.length
                     end
                   end
 
-                  results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+                  results << [[lineno, column], token[1], line, token[3]] unless line.empty?
                   index += 1
                 end
               else
@@ -552,7 +501,7 @@ module Prism
             end
 
             dedent_next =
-              ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+              ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) &&
               embexpr_balance == 0
           end
 
@@ -563,11 +512,11 @@ module Prism
       # Here we will split between the two types of heredocs and return the
       # object that will store their tokens.
       def self.build(opening)
-        case opening.value[2]
+        case opening[2][2]
         when "~"
           DedentingHeredoc.new
         when "-"
-          DashHeredoc.new(opening.value[3] != "'")
+          DashHeredoc.new(opening[2][3] != "'")
         else
           PlainHeredoc.new
         end
@@ -647,16 +596,16 @@ module Prism
             # Ripper doesn't include the rest of the token in the event, so we need to
             # trim it down to just the content on the first line.
             value = value[0..value.index("\n")]
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_comment
-            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_heredoc_end
             # Heredoc end tokens can be emitted in an odd order, so we don't
             # want to bother comparing the state on them.
             last_heredoc_end = token.location.end_offset
-            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_embexpr_end
-            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_words_sep
             # Ripper emits one token each per line.
             value.each_line.with_index do |line, index|
@@ -664,7 +613,7 @@ module Prism
                 lineno += 1
                 column = 0
               end
-              tokens << Token.new([[lineno, column], event, line, lex_state])
+              tokens << [[lineno, column], event, line, lex_state]
             end
             tokens.pop
           when :on_regexp_end
@@ -696,7 +645,7 @@ module Prism
                 previous_state
               end
 
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           when :on_eof
             eof_token = token
             previous_token = result_value[index - 1][0]
@@ -721,13 +670,13 @@ module Prism
                   end_offset += 3
                 end
 
-                tokens << Token.new([[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state])
+                tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]
               end
             end
 
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           else
-            Token.new([[lineno, column], event, value, lex_state])
+            [[lineno, column], event, value, lex_state]
           end
 
         previous_state = lex_state
@@ -813,9 +762,8 @@ module Prism
       tokens = tokens[0...-1]
 
       # We sort by location because Ripper.lex sorts.
-      # Manually implemented instead of `sort_by!(&:location)` for performance.
       tokens.sort_by! do |token|
-        line, column = token.location
+        line, column = token[0]
         source.byte_offset(line, column)
       end
 
@@ -834,7 +782,7 @@ module Prism
       prev_token_end = bom ? 3 : 0
 
       tokens.each do |token|
-        line, column = token.location
+        line, column = token[0]
         start_offset = source.byte_offset(line, column)
 
         # Ripper reports columns on line 1 without counting the BOM, so we
@@ -858,50 +806,28 @@ module Prism
             continuation = sp_value[continuation_index...next_whitespace_index]
             second_whitespace = sp_value[next_whitespace_index..]
 
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column],
-              :on_sp,
-              first_whitespace,
-              prev_token_state
-            ]) unless first_whitespace.empty?
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column + continuation_index],
-              :on_sp,
-              continuation,
-              prev_token_state
-            ])
-
-            new_tokens << IgnoreStateToken.new([
-              [sp_line + 1, 0],
-              :on_sp,
-              second_whitespace,
-              prev_token_state
-            ]) unless second_whitespace.empty?
+            new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
+            new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
+            new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty?
           else
-            new_tokens << IgnoreStateToken.new([
-              [sp_line, sp_column],
-              :on_sp,
-              sp_value,
-              prev_token_state
-            ])
+            new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state]
           end
         end
 
         new_tokens << token
-        prev_token_state = token.state
-        prev_token_end = start_offset + token.value.bytesize
+        prev_token_state = token[3]
+        prev_token_end = start_offset + token[2].bytesize
       end
 
       unless data_loc # no trailing :on_sp with __END__ as it is always preceded by :on_nl
         end_offset = eof_token.location.end_offset
         if prev_token_end < end_offset
-          new_tokens << IgnoreStateToken.new([
+          new_tokens << [
             [source.line(prev_token_end), source.column(prev_token_end)],
             :on_sp,
             source.slice(prev_token_end, end_offset - prev_token_end),
             prev_token_state
-          ])
+          ]
         end
       end
 
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index ccce226d7d..054ad88ce3 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -88,7 +88,7 @@ module Prism
       #      # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
       #
       def self.tokenize(...)
-        lex(...).map(&:value)
+        lex(...).map { |token| token[2] }
       end
 
       # This contains a table of all of the parser events and their
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index c8d259135f..a89a9503b9 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -136,7 +136,7 @@ module Prism
       assert_equal(expected, lexer.parse[0].to_a)
       assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)
 
-      assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
+      assert_equal(%i[on_int on_sp on_op], Translation::Ripper::Lexer.new("1 +").lex.map { |token| token[1] })
       assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
     end
 
@@ -169,13 +169,13 @@ module Prism
       # Prism emits tokens by their order in the code, not in parse order
       ripper.sort_by! { |elem| elem[0] }
 
-      [prism.size, ripper.size].max.times do |i|
-        expected = ripper[i]
-        actual = prism[i]
+      [prism.size, ripper.size].max.times do |index|
+        expected = ripper[index]
+        actual = prism[index]
 
-        # Since tokens related to heredocs are not emitted in the same order,
-        # the state also doesn't line up.
-        if expected && actual && expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end
+        # There are some tokens that have slightly different state that do not
+        # effect the parse tree, so they may not match.
+        if expected && actual && expected[1] == actual[1] && %i[on_comment on_heredoc_end on_embexpr_end on_sp].include?(expected[1])
           expected[3] = actual[3] = nil
         end
author	Kevin Newton <kddnewton@gmail.com>	2026-01-28 09:12:51 -0500
committer	git <svn-admin@ruby-lang.org>	2026-01-28 14:58:55 +0000
commit	01ace0655ed84708f0afdcc74fb779e680bfc4e0 (patch)
tree	a63b6f791e2b56213a81c16ea282dfcd71e1f1f3
parent	3b22e32fa50c2c18663be87dad4d11a266954773 (diff)