summaryrefslogtreecommitdiff
path: root/lib/syntax_suggest/code_frontier.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/syntax_suggest/code_frontier.rb')
-rw-r--r--lib/syntax_suggest/code_frontier.rb178
1 files changed, 178 insertions, 0 deletions
diff --git a/lib/syntax_suggest/code_frontier.rb b/lib/syntax_suggest/code_frontier.rb
new file mode 100644
index 0000000000..0f870d0df0
--- /dev/null
+++ b/lib/syntax_suggest/code_frontier.rb
@@ -0,0 +1,178 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ # The main function of the frontier is to hold the edges of our search and to
+ # evaluate when we can stop searching.
+
+ # There are three main phases in the algorithm:
+ #
+ # 1. Sanitize/format input source
+ # 2. Search for invalid blocks
+ # 3. Format invalid blocks into something meaninful
+ #
+ # The Code frontier is a critical part of the second step
+ #
+ # ## Knowing where we've been
+ #
+ # Once a code block is generated it is added onto the frontier. Then it will be
+ # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
+ # smaller block will cause the smaller block to be evicted.
+ #
+ # CodeFrontier#<<(block) # Adds block to frontier
+ # CodeFrontier#pop # Removes block from frontier
+ #
+ # ## Knowing where we can go
+ #
+ # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
+ # when called, this method returns, a line of code with the highest indentation.
+ #
+ # The returned line of code can be used to build a CodeBlock and then that code block
+ # is added back to the frontier. Then, the lines are removed from the
+ # "unvisited" so we don't double-create the same block.
+ #
+ # CodeFrontier#next_indent_line # Shows next line
+ # CodeFrontier#register_indent_block(block) # Removes lines from unvisited
+ #
+ # ## Knowing when to stop
+ #
+ # The frontier knows how to check the entire document for a syntax error. When blocks
+ # are added onto the frontier, they're removed from the document. When all code containing
+ # syntax errors has been added to the frontier, the document will be parsable without a
+ # syntax error and the search can stop.
+ #
+ # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
+ #
+ # ## Filtering false positives
+ #
+ # Once the search is completed, the frontier may have multiple blocks that do not contain
+ # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
+ #
+ # CodeFrontier#detect_invalid_blocks
+ #
+ class CodeFrontier
+ def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines))
+ @code_lines = code_lines
+ @unvisited = unvisited
+ @queue = PriorityEngulfQueue.new
+
+ @check_next = true
+ end
+
+ def count
+ @queue.length
+ end
+
+ # Performance optimization
+ #
+ # Parsing with ripper is expensive
+ # If we know we don't have any blocks with invalid
+ # syntax, then we know we cannot have found
+ # the incorrect syntax yet.
+ #
+ # When an invalid block is added onto the frontier
+ # check document state
+ private def can_skip_check?
+ check_next = @check_next
+ @check_next = false
+
+ if check_next
+ false
+ else
+ true
+ end
+ end
+
+ # Returns true if the document is valid with all lines
+ # removed. By default it checks all blocks in present in
+ # the frontier array, but can be used for arbitrary arrays
+ # of codeblocks as well
+ def holds_all_syntax_errors?(block_array = @queue, can_cache: true)
+ return false if can_cache && can_skip_check?
+
+ without_lines = block_array.to_a.flat_map do |block|
+ block.lines
+ end
+
+ SyntaxSuggest.valid_without?(
+ without_lines: without_lines,
+ code_lines: @code_lines
+ )
+ end
+
+ # Returns a code block with the largest indentation possible
+ def pop
+ @queue.pop
+ end
+
+ def next_indent_line
+ @unvisited.peek
+ end
+
+ def expand?
+ return false if @queue.empty?
+ return true if @unvisited.empty?
+
+ frontier_indent = @queue.peek.current_indent
+ unvisited_indent = next_indent_line.indent
+
+ if ENV["SYNTAX_SUGGEST_DEBUG"]
+ puts "```"
+ puts @queue.peek
+ puts "```"
+ puts " @frontier indent: #{frontier_indent}"
+ puts " @unvisited indent: #{unvisited_indent}"
+ end
+
+ # Expand all blocks before moving to unvisited lines
+ frontier_indent >= unvisited_indent
+ end
+
+ # Keeps track of what lines have been added to blocks and which are not yet
+ # visited.
+ def register_indent_block(block)
+ @unvisited.visit_block(block)
+ self
+ end
+
+ # When one element fully encapsulates another we remove the smaller
+ # block from the frontier. This prevents double expansions and all-around
+ # weird behavior. However this guarantee is quite expensive to maintain
+ def register_engulf_block(block)
+ end
+
+ # Add a block to the frontier
+ #
+ # This method ensures the frontier always remains sorted (in indentation order)
+ # and that each code block's lines are removed from the indentation hash so we
+ # don't re-evaluate the same line multiple times.
+ def <<(block)
+ @unvisited.visit_block(block)
+
+ @queue.push(block)
+
+ @check_next = true if block.invalid?
+
+ self
+ end
+
+ # Example:
+ #
+ # combination([:a, :b, :c, :d])
+ # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
+ def self.combination(array)
+ guesses = []
+ 1.upto(array.length).each do |size|
+ guesses.concat(array.combination(size).to_a)
+ end
+ guesses
+ end
+
+ # Given that we know our syntax error exists somewhere in our frontier, we want to find
+ # the smallest possible set of blocks that contain all the syntax errors
+ def detect_invalid_blocks
+ self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array|
+ holds_all_syntax_errors?(block_array, can_cache: false)
+ end || []
+ end
+ end
+end