diff options
Diffstat (limited to 'lib/syntax_suggest/clean_document.rb')
| -rw-r--r-- | lib/syntax_suggest/clean_document.rb | 119 |
1 files changed, 19 insertions, 100 deletions
diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index b572189259..94c68d8ad4 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -10,7 +10,7 @@ module SyntaxSuggest # # 1. Sanitize/format input source # 2. Search for invalid blocks - # 3. Format invalid blocks into something meaninful + # 3. Format invalid blocks into something meaningful # # This class handles the first part. # @@ -47,9 +47,9 @@ module SyntaxSuggest # ## Heredocs # # A heredoc is an way of defining a multi-line string. They can cause many - # problems. If left as a single line, Ripper would try to parse the contents + # problems. If left as a single line, the parser would try to parse the contents # as ruby code rather than as a string. Even without this problem, we still - # hit an issue with indentation + # hit an issue with indentation: # # 1 foo = <<~HEREDOC # 2 "Be yourself; everyone else is already taken."" @@ -66,27 +66,9 @@ module SyntaxSuggest # # All of these problems are fixed by joining the whole heredoc into a single # line. - # - # ## Comments and whitespace - # - # Comments can throw off the way the lexer tells us that the line - # logically belongs with the next line. This is valid ruby but - # results in a different lex output than before: - # - # 1 User. - # 2 where(name: "schneems"). - # 3 # Comment here - # 4 first - # - # To handle this we can replace comment lines with empty lines - # and then re-lex the source. This removal and re-lexing preserves - # line index and document size, but generates an easier to work with - # document. - # class CleanDocument def initialize(source:) - lines = clean_sweep(source: source) - @document = CodeLine.from_source(lines.join, lines: lines) + @document = CodeLine.from_source(source) end # Call all of the document "cleaners" @@ -110,60 +92,6 @@ module SyntaxSuggest @document.join end - # Remove comments and whitespace only lines - # - # replace with empty newlines - # - # source = <<~'EOM' - # # Comment 1 - # puts "hello" - # # Comment 2 - # puts "world" - # EOM - # - # lines = CleanDocument.new(source: source).lines - # expect(lines[0].to_s).to eq("\n") - # expect(lines[1].to_s).to eq("puts "hello") - # expect(lines[2].to_s).to eq("\n") - # expect(lines[3].to_s).to eq("puts "world") - # - # Important: This must be done before lexing. - # - # After this change is made, we lex the document because - # removing comments can change how the doc is parsed. - # - # For example: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # # comment - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(1) - # - # After the comment is removed: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(2) - # - def clean_sweep(source:) - source.lines.map do |line| - if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs - $/ - else - line - end - end - end - # Smushes all heredoc lines into one line # # source = <<~'EOM' @@ -180,11 +108,11 @@ module SyntaxSuggest start_index_stack = [] heredoc_beg_end_index = [] lines.each do |line| - line.lex.each do |lex_value| - case lex_value.type - when :on_heredoc_beg + line.tokens.each do |token| + case token.type + when :HEREDOC_START start_index_stack << line.index - when :on_heredoc_end + when :HEREDOC_END start_index = start_index_stack.pop end_index = line.index heredoc_beg_end_index << [start_index, end_index] @@ -210,20 +138,10 @@ module SyntaxSuggest # expect(lines[0].to_s).to eq(source) # expect(lines[1].to_s).to eq("") # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - # def join_consecutive! - consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| - take_while_including(code_line.index..-1) do |line| - line.ignore_newline_not_beg? + consecutive_groups = @document.select(&:consecutive?).map do |code_line| + take_while_including(code_line.index..) do |line| + line.consecutive? end end @@ -243,7 +161,7 @@ module SyntaxSuggest # expect(lines[1].to_s).to eq("") def join_trailing_slash! trailing_groups = @document.select(&:trailing_slash?).map do |code_line| - take_while_including(code_line.index..-1) { |x| x.trailing_slash? } + take_while_including(code_line.index..) { |x| x.trailing_slash? } end join_groups(trailing_groups) self @@ -265,22 +183,23 @@ module SyntaxSuggest groups.each do |lines| line = lines.first - # Handle the case of multiple groups in a a row + # Handle the case of multiple groups in a row # if one is already replaced, move on next if @document[line.index].empty? # Join group into the first line @document[line.index] = CodeLine.new( - lex: lines.map(&:lex).flatten, + tokens: lines.map(&:tokens).flatten, line: lines.join, - index: line.index + index: line.index, + consecutive: false ) # Hide the rest of the lines - lines[1..-1].each do |line| + lines[1..].each do |line| # The above lines already have newlines in them, if add more # then there will be double newline, use an empty line instead - @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false) end end self @@ -291,7 +210,7 @@ module SyntaxSuggest # Like `take_while` except when it stops # iterating, it also returns the line # that caused it to stop - def take_while_including(range = 0..-1) + def take_while_including(range = 0..) take_next_and_stop = false @document[range].take_while do |line| next if take_next_and_stop |
