13 files changed, 271 insertions, 530 deletions
diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb
index 46c9c8adac..5054efa888 100644
--- a/lib/syntax_suggest/api.rb
+++ b/lib/syntax_suggest/api.rb
@@ -7,25 +7,8 @@ require "stringio"
 require "pathname"
 require "timeout"
 
-# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism
-# for lexing and parsing
-require "ripper"
-
 # Prism is the new parser, replacing Ripper
-#
-# We need to "dual boot" both for now because syntax_suggest
-# supports older rubies that do not ship with syntax suggest.
-#
-# We also need the ability to control loading of this library
-# so we can test that both modes work correctly in CI.
-if (value = ENV["SYNTAX_SUGGEST_DISABLE_PRISM"])
-  warn "Skipping loading prism due to SYNTAX_SUGGEST_DISABLE_PRISM=#{value}"
-else
-  begin
-    require "prism"
-  rescue LoadError
-  end
-end
+require "prism"
 
 module SyntaxSuggest
   # Used to indicate a default value that cannot
@@ -35,14 +18,6 @@ module SyntaxSuggest
   class Error < StandardError; end
   TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i
 
-  # SyntaxSuggest.use_prism_parser? [Private]
-  #
-  # Tells us if the prism parser is available for use
-  # or if we should fallback to `Ripper`
-  def self.use_prism_parser?
-    defined?(Prism)
-  end
-
   # SyntaxSuggest.handle_error [Public]
   #
   # Takes a `SyntaxError` exception, uses the
@@ -146,30 +121,17 @@ module SyntaxSuggest
   def self.valid_without?(without_lines:, code_lines:)
     lines = code_lines - Array(without_lines).flatten
 
-    if lines.empty?
-      true
-    else
-      valid?(lines)
-    end
+    lines.empty? || valid?(lines)
   end
 
   # SyntaxSuggest.invalid? [Private]
   #
   # Opposite of `SyntaxSuggest.valid?`
-  if defined?(Prism)
-    def self.invalid?(source)
-      source = source.join if source.is_a?(Array)
-      source = source.to_s
+  def self.invalid?(source)
+    source = source.join if source.is_a?(Array)
+    source = source.to_s
 
-      Prism.parse(source).failure?
-    end
-  else
-    def self.invalid?(source)
-      source = source.join if source.is_a?(Array)
-      source = source.to_s
-
-      Ripper.new(source).tap(&:parse).error?
-    end
+    Prism.parse(source).failure?
   end
 
   # SyntaxSuggest.valid? [Private]
@@ -223,7 +185,6 @@ require_relative "explain_syntax"
 require_relative "clean_document"
 
 # Helpers
-require_relative "lex_all"
 require_relative "code_line"
 require_relative "code_block"
 require_relative "block_expand"
@@ -235,3 +196,5 @@ require_relative "priority_engulf_queue"
 require_relative "pathname_from_message"
 require_relative "display_invalid_blocks"
 require_relative "parse_blocks_from_indent_line"
+require_relative "visitor"
+require_relative "token"
diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb
index ba307af46e..94c68d8ad4 100644
--- a/lib/syntax_suggest/clean_document.rb
+++ b/lib/syntax_suggest/clean_document.rb
@@ -66,27 +66,9 @@ module SyntaxSuggest
   #
   # All of these problems are fixed by joining the whole heredoc into a single
   # line.
-  #
-  # ## Comments and whitespace
-  #
-  # Comments can throw off the way the lexer tells us that the line
-  # logically belongs with the next line. This is valid ruby but
-  # results in a different lex output than before:
-  #
-  #     1 User.
-  #     2   where(name: "schneems").
-  #     3   # Comment here
-  #     4   first
-  #
-  # To handle this we can replace comment lines with empty lines
-  # and then re-lex the source. This removal and re-lexing preserves
-  # line index and document size, but generates an easier to work with
-  # document.
-  #
   class CleanDocument
     def initialize(source:)
-      lines = clean_sweep(source: source)
-      @document = CodeLine.from_source(lines.join, lines: lines)
+      @document = CodeLine.from_source(source)
     end
 
     # Call all of the document "cleaners"
@@ -110,62 +92,6 @@ module SyntaxSuggest
       @document.join
     end
 
-    # Remove comments
-    #
-    # replace with empty newlines
-    #
-    #     source = <<~'EOM'
-    #       # Comment 1
-    #       puts "hello"
-    #       # Comment 2
-    #       puts "world"
-    #     EOM
-    #
-    #     lines = CleanDocument.new(source: source).lines
-    #     expect(lines[0].to_s).to eq("\n")
-    #     expect(lines[1].to_s).to eq("puts "hello")
-    #     expect(lines[2].to_s).to eq("\n")
-    #     expect(lines[3].to_s).to eq("puts "world")
-    #
-    # Important: This must be done before lexing.
-    #
-    # After this change is made, we lex the document because
-    # removing comments can change how the doc is parsed.
-    #
-    # For example:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #         # comment
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #       values.count {|v| v.type == :on_ignored_nl}
-    #     ).to eq(1)
-    #
-    # After the comment is removed:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #      values.count {|v| v.type == :on_ignored_nl}
-    #    ).to eq(2)
-    #
-    def clean_sweep(source:)
-      # Match comments, but not HEREDOC strings with #{variable} interpolation
-      # https://rubular.com/r/HPwtW9OYxKUHXQ
-      source.lines.map do |line|
-        if line.match?(/^\s*#([^{].*|)$/)
-          $/
-        else
-          line
-        end
-      end
-    end
-
     # Smushes all heredoc lines into one line
     #
     #     source = <<~'EOM'
@@ -182,11 +108,11 @@ module SyntaxSuggest
       start_index_stack = []
       heredoc_beg_end_index = []
       lines.each do |line|
-        line.lex.each do |lex_value|
-          case lex_value.type
-          when :on_heredoc_beg
+        line.tokens.each do |token|
+          case token.type
+          when :HEREDOC_START
             start_index_stack << line.index
-          when :on_heredoc_end
+          when :HEREDOC_END
             start_index = start_index_stack.pop
             end_index = line.index
             heredoc_beg_end_index << [start_index, end_index]
@@ -212,20 +138,10 @@ module SyntaxSuggest
     #     expect(lines[0].to_s).to eq(source)
     #     expect(lines[1].to_s).to eq("")
     #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    #
     def join_consecutive!
-      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+      consecutive_groups = @document.select(&:consecutive?).map do |code_line|
         take_while_including(code_line.index..) do |line|
-          line.ignore_newline_not_beg?
+          line.consecutive?
         end
       end
 
@@ -273,16 +189,17 @@ module SyntaxSuggest
 
         # Join group into the first line
         @document[line.index] = CodeLine.new(
-          lex: lines.map(&:lex).flatten,
+          tokens: lines.map(&:tokens).flatten,
           line: lines.join,
-          index: line.index
+          index: line.index,
+          consecutive: false
         )
 
         # Hide the rest of the lines
         lines[1..].each do |line|
           # The above lines already have newlines in them, if add more
           # then there will be double newline, use an empty line instead
-          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+          @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false)
         end
       end
       self
diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb
index 58197e95d0..7fb1aae26a 100644
--- a/lib/syntax_suggest/code_line.rb
+++ b/lib/syntax_suggest/code_line.rb
@@ -26,23 +26,57 @@ module SyntaxSuggest
 
     # Returns an array of CodeLine objects
     # from the source string
-    def self.from_source(source, lines: nil)
-      lines ||= source.lines
-      lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
-      lines.map.with_index do |line, index|
+    def self.from_source(source)
+      source = +source
+      parse_result = Prism.parse_lex(source)
+      ast, tokens = parse_result.value
+
+      clean_comments!(source, parse_result.comments)
+
+      visitor = Visitor.new
+      visitor.visit(ast)
+      tokens.sort_by! { |token, _state| token.location.start_line }
+
+      prev_token = nil
+      tokens.map! do |token, _state|
+        prev_token = Token.new(token, prev_token, visitor)
+      end
+
+      tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
+      source.lines.map.with_index do |line, index|
         CodeLine.new(
           line: line,
           index: index,
-          lex: lex_array_for_line[index + 1]
+          tokens: tokens_for_line[index + 1],
+          consecutive: visitor.consecutive_lines.include?(index + 1)
         )
       end
     end
 
-    attr_reader :line, :index, :lex, :line_number, :indent
-    def initialize(line:, index:, lex:)
-      @lex = lex
+    # Remove comments that apear on their own in source. They will never be the cause
+    # of syntax errors and are just visual noise. Example:
+    #
+    #   source = +<<~RUBY
+    #     # Comment-only line
+    #     foo # Inline comment
+    #   RUBY
+    #   CodeLine.clean_comments!(source, Prism.parse(source).comments)
+    #   source # => "\nfoo # Inline comment\n"
+    def self.clean_comments!(source, comments)
+      # Iterate backwards since we are modifying the source in place and must preserve
+      # the offsets. Prism comments are sorted by their location in the source.
+      comments.reverse_each do |comment|
+        next if comment.trailing?
+        source.bytesplice(comment.location.start_offset, comment.location.length, "")
+      end
+    end
+
+    attr_reader :line, :index, :tokens, :line_number, :indent
+    def initialize(line:, index:, tokens:, consecutive:)
+      @tokens = tokens
       @line = line
       @index = index
+      @consecutive = consecutive
       @original = line
       @line_number = @index + 1
       strip_line = line.dup
@@ -151,92 +185,36 @@ module SyntaxSuggest
       index <=> other.index
     end
 
-    # [Not stable API]
-    #
-    # Lines that have a `on_ignored_nl` type token and NOT
-    # a `BEG` type seem to be a good proxy for the ability
-    # to join multiple lines into one.
-    #
-    # This predicate method is used to determine when those
-    # two criteria have been met.
-    #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    def ignore_newline_not_beg?
-      @ignore_newline_not_beg
+    # Can this line be logically joined together
+    # with the following line? Determined by walking
+    # the AST
+    def consecutive?
+      @consecutive
     end
 
-    # Determines if the given line has a trailing slash
+    # Determines if the given line has a trailing slash.
+    # Simply check if the line contains a backslash after
+    # the content of the last token.
     #
     #     lines = CodeLine.from_source(<<~EOM)
     #       it "foo" \
     #     EOM
     #     expect(lines.first.trailing_slash?).to eq(true)
     #
-    if SyntaxSuggest.use_prism_parser?
-      def trailing_slash?
-        last = @lex.last
-        last&.type == :on_tstring_end
-      end
-    else
-      def trailing_slash?
-        last = @lex.last
-        return false unless last
-        return false unless last.type == :on_sp
-
-        last.token == TRAILING_SLASH
-      end
+    def trailing_slash?
+      return unless (last = @tokens.last)
+      @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil
     end
 
-    # Endless method detection
-    #
-    # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
-    # Detecting a "oneliner" seems to need a state machine.
-    # This can be done by looking mostly at the "state" (last value):
-    #
-    #   ENDFN -> BEG (token = '=' ) -> END
-    #
     private def set_kw_end
-      oneliner_count = 0
-      in_oneliner_def = nil
-
       kw_count = 0
       end_count = 0
 
-      @ignore_newline_not_beg = false
-      @lex.each do |lex|
-        kw_count += 1 if lex.is_kw?
-        end_count += 1 if lex.is_end?
-
-        if lex.type == :on_ignored_nl
-          @ignore_newline_not_beg = !lex.expr_beg?
-        end
-
-        if in_oneliner_def.nil?
-          in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN)
-        elsif lex.state.allbits?(Ripper::EXPR_ENDFN)
-          # Continue
-        elsif lex.state.allbits?(Ripper::EXPR_BEG)
-          in_oneliner_def = :BODY if lex.token == "="
-        elsif lex.state.allbits?(Ripper::EXPR_END)
-          # We found an endless method, count it
-          oneliner_count += 1 if in_oneliner_def == :BODY
-
-          in_oneliner_def = nil
-        else
-          in_oneliner_def = nil
-        end
+      @tokens.each do |token|
+        kw_count += 1 if token.is_kw?
+        end_count += 1 if token.is_end?
       end
 
-      kw_count -= oneliner_count
-
       @is_kw = (kw_count - end_count) > 0
       @is_end = (end_count - kw_count) > 0
     end
diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb
index 94f57ba605..ffbc922eed 100644
--- a/lib/syntax_suggest/core_ext.rb
+++ b/lib/syntax_suggest/core_ext.rb
@@ -1,96 +1,47 @@
 # frozen_string_literal: true
 
-# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require`
-if SyntaxError.method_defined?(:detailed_message)
-  module SyntaxSuggest
-    # SyntaxSuggest.module_for_detailed_message [Private]
-    #
-    # Used to monkeypatch SyntaxError via Module.prepend
-    def self.module_for_detailed_message
-      Module.new {
-        def detailed_message(highlight: true, syntax_suggest: true, **kwargs)
-          return super unless syntax_suggest
-
-          require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
-          message = super
-
-          if path
-            file = Pathname.new(path)
-            io = SyntaxSuggest::MiniStringIO.new
-
-            SyntaxSuggest.call(
-              io: io,
-              source: file.read,
-              filename: file,
-              terminal: highlight
-            )
-            annotation = io.string
-
-            annotation += "\n" unless annotation.end_with?("\n")
-
-            annotation + message
-          else
-            message
-          end
-        rescue => e
-          if ENV["SYNTAX_SUGGEST_DEBUG"]
-            $stderr.warn(e.message)
-            $stderr.warn(e.backtrace)
-          end
-
-          # Ignore internal errors
+module SyntaxSuggest
+  # SyntaxSuggest.module_for_detailed_message [Private]
+  #
+  # Used to monkeypatch SyntaxError via Module.prepend
+  def self.module_for_detailed_message
+    Module.new {
+      def detailed_message(highlight: true, syntax_suggest: true, **kwargs)
+        return super unless syntax_suggest
+
+        require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
+
+        message = super
+
+        if path
+          file = Pathname.new(path)
+          io = SyntaxSuggest::MiniStringIO.new
+
+          SyntaxSuggest.call(
+            io: io,
+            source: file.read,
+            filename: file,
+            terminal: highlight
+          )
+          annotation = io.string
+
+          annotation += "\n" unless annotation.end_with?("\n")
+
+          annotation + message
+        else
           message
         end
-      }
-    end
-  end
-
-  SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message)
-else
-  autoload :Pathname, "pathname"
-
-  #--
-  # Monkey patch kernel to ensure that all `require` calls call the same
-  # method
-  #++
-  module Kernel
-    # :stopdoc:
-
-    module_function
-
-    alias_method :syntax_suggest_original_require, :require
-    alias_method :syntax_suggest_original_require_relative, :require_relative
-    alias_method :syntax_suggest_original_load, :load
-
-    def load(file, wrap = false)
-      syntax_suggest_original_load(file)
-    rescue SyntaxError => e
-      require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
-      SyntaxSuggest.handle_error(e)
-    end
-
-    def require(file)
-      syntax_suggest_original_require(file)
-    rescue SyntaxError => e
-      require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
-      SyntaxSuggest.handle_error(e)
-    end
+      rescue => e
+        if ENV["SYNTAX_SUGGEST_DEBUG"]
+          $stderr.warn(e.message)
+          $stderr.warn(e.backtrace)
+        end
 
-    def require_relative(file)
-      if Pathname.new(file).absolute?
-        syntax_suggest_original_require file
-      else
-        relative_from = caller_locations(1..1).first
-        relative_from_path = relative_from.absolute_path || relative_from.path
-        syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path)
+        # Ignore internal errors
+        message
       end
-    rescue SyntaxError => e
-      require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
-      SyntaxSuggest.handle_error(e)
-    end
+    }
   end
 end
+
+SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message)
diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb
index 0d80c4d869..d7f5262ddb 100644
--- a/lib/syntax_suggest/explain_syntax.rb
+++ b/lib/syntax_suggest/explain_syntax.rb
@@ -1,19 +1,11 @@
 # frozen_string_literal: true
 
-require_relative "left_right_lex_count"
-
-if !SyntaxSuggest.use_prism_parser?
-  require_relative "ripper_errors"
-end
+require_relative "left_right_token_count"
 
 module SyntaxSuggest
   class GetParseErrors
     def self.errors(source)
-      if SyntaxSuggest.use_prism_parser?
-        Prism.parse(source).errors.map(&:message)
-      else
-        RipperErrors.new(source).call.errors
-      end
+      Prism.parse(source).errors.map(&:message)
     end
   end
 
@@ -53,14 +45,14 @@ module SyntaxSuggest
 
     def initialize(code_lines:)
       @code_lines = code_lines
-      @left_right = LeftRightLexCount.new
+      @left_right = LeftRightTokenCount.new
       @missing = nil
     end
 
     def call
       @code_lines.each do |line|
-        line.lex.each do |lex|
-          @left_right.count_lex(lex)
+        line.tokens.each do |token|
+          @left_right.count_token(token)
         end
       end
 
diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_token_count.rb
index 6fcae7482b..e0562ba9cd 100644
--- a/lib/syntax_suggest/left_right_lex_count.rb
+++ b/lib/syntax_suggest/left_right_token_count.rb
@@ -9,19 +9,19 @@ module SyntaxSuggest
   #
   # Example:
   #
-  #   left_right = LeftRightLexCount.new
+  #   left_right = LeftRightTokenCount.new
   #   left_right.count_kw
   #   left_right.missing.first
   #   # => "end"
   #
-  #   left_right = LeftRightLexCount.new
+  #   left_right = LeftRightTokenCount.new
   #   source = "{ a: b, c: d" # Note missing '}'
-  #   LexAll.new(source: source).each do |lex|
-  #     left_right.count_lex(lex)
+  #   LexAll.new(source: source).each do |token|
+  #     left_right.count_token(token)
   #   end
   #   left_right.missing.first
   #   # => "}"
-  class LeftRightLexCount
+  class LeftRightTokenCount
     def initialize
       @kw_count = 0
       @end_count = 0
@@ -49,52 +49,46 @@ module SyntaxSuggest
     #
     # Example:
     #
-    #   left_right = LeftRightLexCount.new
-    #   left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG))
+    #   token = CodeLine.from_source("{").first.tokens.first
+    #   left_right = LeftRightTokenCount.new
+    #   left_right.count_token(Token.new(token)
     #   left_right.count_for_char("{")
     #   # => 1
     #   left_right.count_for_char("}")
     #   # => 0
-    def count_lex(lex)
-      case lex.type
-      when :on_tstring_content
+    def count_token(token)
+      case token.type
+      when :STRING_CONTENT
         # ^^^
         # Means it's a string or a symbol `"{"` rather than being
         # part of a data structure (like a hash) `{ a: b }`
         # ignore it.
-      when :on_words_beg, :on_symbos_beg, :on_qwords_beg,
-           :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg
+      when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W,
+           :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN
         # ^^^
         # Handle shorthand syntaxes like `%Q{ i am a string }`
         #
         # The start token will be the full thing `%Q{` but we
         # need to count it as if it's a `{`. Any token
         # can be used
-        char = lex.token[-1]
+        char = token.value[-1]
         @count_for_char[char] += 1 if @count_for_char.key?(char)
-      when :on_embexpr_beg
+      when :EMBEXPR_BEGIN
         # ^^^
         # Embedded string expressions like `"#{foo} <-embed"`
         # are parsed with chars:
         #
-        # `#{` as :on_embexpr_beg
-        #  `}` as :on_embexpr_end
-        #
-        # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end
-        # because sometimes the lexer thinks something is an embed
-        # string end, when it is not like `lol = }` (no clue why).
+        # `#{` as :EMBEXPR_BEGIN
+        #  `}` as :EMBEXPR_END
         #
         # When we see `#{` count it as a `{` or we will
         # have a mis-match count.
         #
-        case lex.token
-        when "\#{"
-          @count_for_char["{"] += 1
-        end
+        @count_for_char["{"] += 1
       else
-        @end_count += 1 if lex.is_end?
-        @kw_count += 1 if lex.is_kw?
-        @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token)
+        @end_count += 1 if token.is_end?
+        @kw_count += 1 if token.is_kw?
+        @count_for_char[token.value] += 1 if @count_for_char.key?(token.value)
       end
     end
 
diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb
deleted file mode 100644
index c16fbb52d3..0000000000
--- a/lib/syntax_suggest/lex_all.rb
+++ /dev/null
@@ -1,74 +0,0 @@
-# frozen_string_literal: true
-
-module SyntaxSuggest
-  # Ripper.lex is not guaranteed to lex the entire source document
-  #
-  # This class guarantees the whole document is lex-ed by iteratively
-  # lexing the document where ripper stopped.
-  #
-  # Prism likely doesn't have the same problem. Once ripper support is removed
-  # we can likely reduce the complexity here if not remove the whole concept.
-  #
-  # Example usage:
-  #
-  #   lex = LexAll.new(source: source)
-  #   lex.each do |value|
-  #     puts value.line
-  #   end
-  class LexAll
-    include Enumerable
-
-    def initialize(source:, source_lines: nil)
-      @lex = self.class.lex(source, 1)
-      lineno = @lex.last[0][0] + 1
-      source_lines ||= source.lines
-      last_lineno = source_lines.length
-
-      until lineno >= last_lineno
-        lines = source_lines[lineno..]
-
-        @lex.concat(
-          self.class.lex(lines.join, lineno + 1)
-        )
-
-        lineno = @lex.last[0].first + 1
-      end
-
-      last_lex = nil
-      @lex.map! { |elem|
-        last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex)
-      }
-    end
-
-    if SyntaxSuggest.use_prism_parser?
-      def self.lex(source, line_number)
-        Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] }
-      end
-    else
-      def self.lex(source, line_number)
-        Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos)
-      end
-    end
-
-    def to_a
-      @lex
-    end
-
-    def each
-      return @lex.each unless block_given?
-      @lex.each do |x|
-        yield x
-      end
-    end
-
-    def [](index)
-      @lex[index]
-    end
-
-    def last
-      @lex.last
-    end
-  end
-end
-
-require_relative "lex_value"
diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb
deleted file mode 100644
index b46a332772..0000000000
--- a/lib/syntax_suggest/lex_value.rb
+++ /dev/null
@@ -1,70 +0,0 @@
-# frozen_string_literal: true
-
-module SyntaxSuggest
-  # Value object for accessing lex values
-  #
-  # This lex:
-  #
-  #   [1, 0], :on_ident, "describe", CMDARG
-  #
-  # Would translate into:
-  #
-  #  lex.line # => 1
-  #  lex.type # => :on_indent
-  #  lex.token # => "describe"
-  class LexValue
-    attr_reader :line, :type, :token, :state
-
-    def initialize(line, type, token, state, last_lex = nil)
-      @line = line
-      @type = type
-      @token = token
-      @state = state
-
-      set_kw_end(last_lex)
-    end
-
-    private def set_kw_end(last_lex)
-      @is_end = false
-      @is_kw = false
-      return if type != :on_kw
-
-      return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953
-
-      case token
-      when "if", "unless", "while", "until"
-        # Only count if/unless when it's not a "trailing" if/unless
-        # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
-        @is_kw = true unless expr_label?
-      when "def", "case", "for", "begin", "class", "module", "do"
-        @is_kw = true
-      when "end"
-        @is_end = true
-      end
-    end
-
-    def fname?
-      state.allbits?(Ripper::EXPR_FNAME)
-    end
-
-    def ignore_newline?
-      type == :on_ignored_nl
-    end
-
-    def is_end?
-      @is_end
-    end
-
-    def is_kw?
-      @is_kw
-    end
-
-    def expr_beg?
-      state.anybits?(Ripper::EXPR_BEG)
-    end
-
-    def expr_label?
-      state.allbits?(Ripper::EXPR_LABEL)
-    end
-  end
-end
diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb
deleted file mode 100644
index 4e2bc90948..0000000000
--- a/lib/syntax_suggest/ripper_errors.rb
+++ /dev/null
@@ -1,39 +0,0 @@
-# frozen_string_literal: true
-
-module SyntaxSuggest
-  # Capture parse errors from Ripper
-  #
-  # Prism returns the errors with their messages, but Ripper
-  # does not. To get them we must make a custom subclass.
-  #
-  # Example:
-  #
-  #   puts RipperErrors.new(" def foo").call.errors
-  #   # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"]
-  class RipperErrors < Ripper
-    attr_reader :errors
-
-    # Comes from ripper, called
-    # on every parse error, msg
-    # is a string
-    def on_parse_error(msg)
-      @errors ||= []
-      @errors << msg
-    end
-
-    alias_method :on_alias_error, :on_parse_error
-    alias_method :on_assign_error, :on_parse_error
-    alias_method :on_class_name_error, :on_parse_error
-    alias_method :on_param_error, :on_parse_error
-    alias_method :compile_error, :on_parse_error
-
-    def call
-      @run_once ||= begin
-        @errors = []
-        parse
-        true
-      end
-      self
-    end
-  end
-end
diff --git a/lib/syntax_suggest/syntax_suggest.gemspec b/lib/syntax_suggest/syntax_suggest.gemspec
index 756a85bf63..44e458aaad 100644
--- a/lib/syntax_suggest/syntax_suggest.gemspec
+++ b/lib/syntax_suggest/syntax_suggest.gemspec
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
   spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it'
   spec.homepage = "https://github.com/ruby/syntax_suggest.git"
   spec.license = "MIT"
-  spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
+  spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0")
 
   spec.metadata["homepage_uri"] = spec.homepage
   spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git"
diff --git a/lib/syntax_suggest/token.rb b/lib/syntax_suggest/token.rb
new file mode 100644
index 0000000000..fc52639b1f
--- /dev/null
+++ b/lib/syntax_suggest/token.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+  # Value object for accessing lex values
+  #
+  # This lex:
+  #
+  #   [IDENTIFIER(1,0)-(1,8)("describe"), 32]
+  #
+  # Would translate into:
+  #
+  #  lex.location # => (1,0)-(1,8)
+  #  lex.type # => :IDENTIFIER
+  #  lex.token # => "describe"
+  class Token
+    attr_reader :location, :type, :value
+
+    KW_TYPES = %i[
+      KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL
+      KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP
+    ].to_set.freeze
+    private_constant :KW_TYPES
+
+    def initialize(prism_token, previous_prism_token, visitor)
+      @location = prism_token.location
+      @type = prism_token.type
+      @value = prism_token.value
+
+      # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE
+      # https://github.com/ruby/prism/issues/3940
+      symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN
+      @is_kw = KW_TYPES.include?(@type)
+      @is_kw = false if symbol_content || visitor.endless_def_keyword_offsets.include?(@location.start_offset)
+      @is_end = @type == :KEYWORD_END
+    end
+
+    def line
+      @location.start_line
+    end
+
+    def is_end?
+      @is_end
+    end
+
+    def is_kw?
+      @is_kw
+    end
+  end
+end
diff --git a/lib/syntax_suggest/version.rb b/lib/syntax_suggest/version.rb
index 1aa908f4e5..9114a079f6 100644
--- a/lib/syntax_suggest/version.rb
+++ b/lib/syntax_suggest/version.rb
@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 
 module SyntaxSuggest
-  VERSION = "2.0.2"
+  VERSION = "3.0.0"
 end
diff --git a/lib/syntax_suggest/visitor.rb b/lib/syntax_suggest/visitor.rb
new file mode 100644
index 0000000000..6e25f7239c
--- /dev/null
+++ b/lib/syntax_suggest/visitor.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+  # Walks the Prism AST to extract structural info that cannot be reliably determined from tokens
+  # alone.
+  #
+  # Such as the location of lines that must be logically joined so the search algorithm will
+  # treat them as one. Example:
+  #
+  #   source = <<~RUBY
+  #     User                        # 1
+  #       .where(name: "Earlopain") # 2
+  #       .first                    # 3
+  #   RUBY
+  #   ast, _tokens = Prism.parse_lex(source).value
+  #   visitor = Visitor.new
+  #   visitor.visit(ast)
+  #   visitor.consecutive_lines # => Set[2, 1]
+  #
+  # This output means that line 1 and line 2 need to be joined with their next line.
+  #
+  # And determining the location of "endless" method definitions. For example:
+  #
+  #   source = <<~RUBY
+  #     def cube(x)
+  #       x * x * x
+  #     end
+  #     def square(x) = x * x # 1
+  #   RUBY
+  #
+  #   ast, _tokens = Prism.parse_lex(source).value
+  #   visitor = Visitor.new
+  #   visitor.visit(ast)
+  #   visitor.endless_def_keyword_offsets # => Set[28]
+  class Visitor < Prism::Visitor
+    attr_reader :endless_def_keyword_offsets, :consecutive_lines
+
+    def initialize
+      @endless_def_keyword_offsets = Set.new
+      @consecutive_lines = Set.new
+    end
+
+    # Called by Prism::Visitor for every method-call node in the AST
+    # (e.g. `foo.bar`, `foo.bar.baz`).
+    def visit_call_node(node)
+      receiver_loc = node.receiver&.location
+      call_operator_loc = node.call_operator_loc
+      message_loc = node.message_loc
+      if receiver_loc && call_operator_loc && message_loc
+        # dot-leading (dot on the next line)
+        #   foo        # line 1 - consecutive
+        #     .bar     # line 2
+        if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line
+          (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line|
+            @consecutive_lines << line
+          end
+        end
+
+        # dot-trailing (dot on the same line as the receiver)
+        #   foo.       # line 1 - consecutive
+        #     bar      # line 2
+        if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line
+          (call_operator_loc.start_line..message_loc.start_line - 1).each do |line|
+            @consecutive_lines << line
+          end
+        end
+      end
+      super
+    end
+
+    # Called by Prism::Visitor for every `def` node in the AST.
+    # Records the keyword start location for endless method definitions
+    # like `def foo = 123`. These are valid without a matching `end`,
+    # so Token must exclude them when deciding if a line is a keyword.
+    def visit_def_node(node)
+      @endless_def_keyword_offsets << node.def_keyword_loc.start_offset if node.equal_loc
+      super
+    end
+  end
+end