1 files changed, 19 insertions, 100 deletions
diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb
index b572189259..94c68d8ad4 100644
--- a/lib/syntax_suggest/clean_document.rb
+++ b/lib/syntax_suggest/clean_document.rb
@@ -10,7 +10,7 @@ module SyntaxSuggest
   #
   # 1. Sanitize/format input source
   # 2. Search for invalid blocks
-  # 3. Format invalid blocks into something meaninful
+  # 3. Format invalid blocks into something meaningful
   #
   # This class handles the first part.
   #
@@ -47,9 +47,9 @@ module SyntaxSuggest
   # ## Heredocs
   #
   # A heredoc is an way of defining a multi-line string. They can cause many
-  # problems. If left as a single line, Ripper would try to parse the contents
+  # problems. If left as a single line, the parser would try to parse the contents
   # as ruby code rather than as a string. Even without this problem, we still
-  # hit an issue with indentation
+  # hit an issue with indentation:
   #
   #    1 foo = <<~HEREDOC
   #    2  "Be yourself; everyone else is already taken.""
@@ -66,27 +66,9 @@ module SyntaxSuggest
   #
   # All of these problems are fixed by joining the whole heredoc into a single
   # line.
-  #
-  # ## Comments and whitespace
-  #
-  # Comments can throw off the way the lexer tells us that the line
-  # logically belongs with the next line. This is valid ruby but
-  # results in a different lex output than before:
-  #
-  #     1 User.
-  #     2   where(name: "schneems").
-  #     3   # Comment here
-  #     4   first
-  #
-  # To handle this we can replace comment lines with empty lines
-  # and then re-lex the source. This removal and re-lexing preserves
-  # line index and document size, but generates an easier to work with
-  # document.
-  #
   class CleanDocument
     def initialize(source:)
-      lines = clean_sweep(source: source)
-      @document = CodeLine.from_source(lines.join, lines: lines)
+      @document = CodeLine.from_source(source)
     end
 
     # Call all of the document "cleaners"
@@ -110,60 +92,6 @@ module SyntaxSuggest
       @document.join
     end
 
-    # Remove comments and whitespace only lines
-    #
-    # replace with empty newlines
-    #
-    #     source = <<~'EOM'
-    #       # Comment 1
-    #       puts "hello"
-    #       # Comment 2
-    #       puts "world"
-    #     EOM
-    #
-    #     lines = CleanDocument.new(source: source).lines
-    #     expect(lines[0].to_s).to eq("\n")
-    #     expect(lines[1].to_s).to eq("puts "hello")
-    #     expect(lines[2].to_s).to eq("\n")
-    #     expect(lines[3].to_s).to eq("puts "world")
-    #
-    # Important: This must be done before lexing.
-    #
-    # After this change is made, we lex the document because
-    # removing comments can change how the doc is parsed.
-    #
-    # For example:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #         # comment
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #       values.count {|v| v.type == :on_ignored_nl}
-    #     ).to eq(1)
-    #
-    # After the comment is removed:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #      values.count {|v| v.type == :on_ignored_nl}
-    #    ).to eq(2)
-    #
-    def clean_sweep(source:)
-      source.lines.map do |line|
-        if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs
-          $/
-        else
-          line
-        end
-      end
-    end
-
     # Smushes all heredoc lines into one line
     #
     #     source = <<~'EOM'
@@ -180,11 +108,11 @@ module SyntaxSuggest
       start_index_stack = []
       heredoc_beg_end_index = []
       lines.each do |line|
-        line.lex.each do |lex_value|
-          case lex_value.type
-          when :on_heredoc_beg
+        line.tokens.each do |token|
+          case token.type
+          when :HEREDOC_START
             start_index_stack << line.index
-          when :on_heredoc_end
+          when :HEREDOC_END
             start_index = start_index_stack.pop
             end_index = line.index
             heredoc_beg_end_index << [start_index, end_index]
@@ -210,20 +138,10 @@ module SyntaxSuggest
     #     expect(lines[0].to_s).to eq(source)
     #     expect(lines[1].to_s).to eq("")
     #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    #
     def join_consecutive!
-      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
-        take_while_including(code_line.index..-1) do |line|
-          line.ignore_newline_not_beg?
+      consecutive_groups = @document.select(&:consecutive?).map do |code_line|
+        take_while_including(code_line.index..) do |line|
+          line.consecutive?
         end
       end
 
@@ -243,7 +161,7 @@ module SyntaxSuggest
     #     expect(lines[1].to_s).to eq("")
     def join_trailing_slash!
       trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
-        take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
+        take_while_including(code_line.index..) { |x| x.trailing_slash? }
       end
       join_groups(trailing_groups)
       self
@@ -265,22 +183,23 @@ module SyntaxSuggest
       groups.each do |lines|
         line = lines.first
 
-        # Handle the case of multiple groups in a a row
+        # Handle the case of multiple groups in a row
         # if one is already replaced, move on
         next if @document[line.index].empty?
 
         # Join group into the first line
         @document[line.index] = CodeLine.new(
-          lex: lines.map(&:lex).flatten,
+          tokens: lines.map(&:tokens).flatten,
           line: lines.join,
-          index: line.index
+          index: line.index,
+          consecutive: false
         )
 
         # Hide the rest of the lines
-        lines[1..-1].each do |line|
+        lines[1..].each do |line|
           # The above lines already have newlines in them, if add more
           # then there will be double newline, use an empty line instead
-          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+          @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false)
         end
       end
       self
@@ -291,7 +210,7 @@ module SyntaxSuggest
     # Like `take_while` except when it stops
     # iterating, it also returns the line
     # that caused it to stop
-    def take_while_including(range = 0..-1)
+    def take_while_including(range = 0..)
       take_next_and_stop = false
       @document[range].take_while do |line|
         next if take_next_and_stop