diff options
Diffstat (limited to 'lib/syntax_suggest/clean_document.rb')
| -rw-r--r-- | lib/syntax_suggest/clean_document.rb | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb new file mode 100644 index 0000000000..94c68d8ad4 --- /dev/null +++ b/lib/syntax_suggest/clean_document.rb @@ -0,0 +1,223 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Parses and sanitizes source into a lexically aware document + # + # Internally the document is represented by an array with each + # index containing a CodeLine correlating to a line from the source code. + # + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaningful + # + # This class handles the first part. + # + # The reason this class exists is to format input source + # for better/easier/cleaner exploration. + # + # The CodeSearch class operates at the line level so + # we must be careful to not introduce lines that look + # valid by themselves, but when removed will trigger syntax errors + # or strange behavior. + # + # ## Join Trailing slashes + # + # Code with a trailing slash is logically treated as a single line: + # + # 1 it "code can be split" \ + # 2 "across multiple lines" do + # + # In this case removing line 2 would add a syntax error. We get around + # this by internally joining the two lines into a single "line" object + # + # ## Logically Consecutive lines + # + # Code that can be broken over multiple + # lines such as method calls are on different lines: + # + # 1 User. + # 2 where(name: "schneems"). + # 3 first + # + # Removing line 2 can introduce a syntax error. To fix this, all lines + # are joined into one. + # + # ## Heredocs + # + # A heredoc is an way of defining a multi-line string. They can cause many + # problems. If left as a single line, the parser would try to parse the contents + # as ruby code rather than as a string. Even without this problem, we still + # hit an issue with indentation: + # + # 1 foo = <<~HEREDOC + # 2 "Be yourself; everyone else is already taken."" + # 3 ― Oscar Wilde + # 4 puts "I look like ruby code" # but i'm still a heredoc + # 5 HEREDOC + # + # If we didn't join these lines then our algorithm would think that line 4 + # is separate from the rest, has a higher indentation, then look at it first + # and remove it. + # + # If the code evaluates line 5 by itself it will think line 5 is a constant, + # remove it, and introduce a syntax errror. + # + # All of these problems are fixed by joining the whole heredoc into a single + # line. + class CleanDocument + def initialize(source:) + @document = CodeLine.from_source(source) + end + + # Call all of the document "cleaners" + # and return self + def call + join_trailing_slash! + join_consecutive! + join_heredoc! + + self + end + + # Return an array of CodeLines in the + # document + def lines + @document + end + + # Renders the document back to a string + def to_s + @document.join + end + + # Smushes all heredoc lines into one line + # + # source = <<~'EOM' + # foo = <<~HEREDOC + # lol + # hehehe + # HEREDOC + # EOM + # + # lines = CleanDocument.new(source: source).join_heredoc!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + def join_heredoc! + start_index_stack = [] + heredoc_beg_end_index = [] + lines.each do |line| + line.tokens.each do |token| + case token.type + when :HEREDOC_START + start_index_stack << line.index + when :HEREDOC_END + start_index = start_index_stack.pop + end_index = line.index + heredoc_beg_end_index << [start_index, end_index] + end + end + end + + heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] } + + join_groups(heredoc_groups) + self + end + + # Smushes logically "consecutive" lines + # + # source = <<~'EOM' + # User. + # where(name: 'schneems'). + # first + # EOM + # + # lines = CleanDocument.new(source: source).join_consecutive!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + # + def join_consecutive! + consecutive_groups = @document.select(&:consecutive?).map do |code_line| + take_while_including(code_line.index..) do |line| + line.consecutive? + end + end + + join_groups(consecutive_groups) + self + end + + # Join lines with a trailing slash + # + # source = <<~'EOM' + # it "code can be split" \ + # "across multiple lines" do + # EOM + # + # lines = CleanDocument.new(source: source).join_consecutive!.lines + # expect(lines[0].to_s).to eq(source) + # expect(lines[1].to_s).to eq("") + def join_trailing_slash! + trailing_groups = @document.select(&:trailing_slash?).map do |code_line| + take_while_including(code_line.index..) { |x| x.trailing_slash? } + end + join_groups(trailing_groups) + self + end + + # Helper method for joining "groups" of lines + # + # Input is expected to be type Array<Array<CodeLine>> + # + # The outer array holds the various "groups" while the + # inner array holds code lines. + # + # All code lines are "joined" into the first line in + # their group. + # + # To preserve document size, empty lines are placed + # in the place of the lines that were "joined" + def join_groups(groups) + groups.each do |lines| + line = lines.first + + # Handle the case of multiple groups in a row + # if one is already replaced, move on + next if @document[line.index].empty? + + # Join group into the first line + @document[line.index] = CodeLine.new( + tokens: lines.map(&:tokens).flatten, + line: lines.join, + index: line.index, + consecutive: false + ) + + # Hide the rest of the lines + lines[1..].each do |line| + # The above lines already have newlines in them, if add more + # then there will be double newline, use an empty line instead + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false) + end + end + self + end + + # Helper method for grabbing elements from document + # + # Like `take_while` except when it stops + # iterating, it also returns the line + # that caused it to stop + def take_while_including(range = 0..) + take_next_and_stop = false + @document[range].take_while do |line| + next if take_next_and_stop + + take_next_and_stop = !(yield line) + true + end + end + end +end |
