summaryrefslogtreecommitdiff
path: root/lib/syntax_suggest
diff options
context:
space:
mode:
Diffstat (limited to 'lib/syntax_suggest')
-rw-r--r--lib/syntax_suggest/api.rb23
-rw-r--r--lib/syntax_suggest/around_block_scan.rb290
-rw-r--r--lib/syntax_suggest/block_expand.rb113
-rw-r--r--lib/syntax_suggest/capture/before_after_keyword_ends.rb85
-rw-r--r--lib/syntax_suggest/capture/falling_indent_lines.rb71
-rw-r--r--lib/syntax_suggest/capture_code_context.rb34
-rw-r--r--lib/syntax_suggest/clean_document.rb119
-rw-r--r--lib/syntax_suggest/cli.rb5
-rw-r--r--lib/syntax_suggest/code_block.rb2
-rw-r--r--lib/syntax_suggest/code_frontier.rb4
-rw-r--r--lib/syntax_suggest/code_line.rb133
-rw-r--r--lib/syntax_suggest/code_search.rb4
-rw-r--r--lib/syntax_suggest/core_ext.rb136
-rw-r--r--lib/syntax_suggest/display_code_with_line_numbers.rb6
-rw-r--r--lib/syntax_suggest/display_invalid_blocks.rb3
-rw-r--r--lib/syntax_suggest/explain_syntax.rb22
-rw-r--r--lib/syntax_suggest/left_right_token_count.rb (renamed from lib/syntax_suggest/left_right_lex_count.rb)48
-rw-r--r--lib/syntax_suggest/lex_all.rb55
-rw-r--r--lib/syntax_suggest/lex_value.rb70
-rw-r--r--lib/syntax_suggest/mini_stringio.rb30
-rw-r--r--lib/syntax_suggest/parse_blocks_from_indent_line.rb6
-rw-r--r--lib/syntax_suggest/pathname_from_message.rb4
-rw-r--r--lib/syntax_suggest/ripper_errors.rb36
-rw-r--r--lib/syntax_suggest/scan_history.rb134
-rw-r--r--lib/syntax_suggest/syntax_suggest.gemspec8
-rw-r--r--lib/syntax_suggest/token.rb49
-rw-r--r--lib/syntax_suggest/version.rb2
-rw-r--r--lib/syntax_suggest/visitor.rb80
28 files changed, 910 insertions, 662 deletions
diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb
index 5b725e13d7..5054efa888 100644
--- a/lib/syntax_suggest/api.rb
+++ b/lib/syntax_suggest/api.rb
@@ -5,9 +5,11 @@ require_relative "version"
require "tmpdir"
require "stringio"
require "pathname"
-require "ripper"
require "timeout"
+# Prism is the new parser, replacing Ripper
+require "prism"
+
module SyntaxSuggest
# Used to indicate a default value that cannot
# be confused with another input.
@@ -78,7 +80,7 @@ module SyntaxSuggest
code_lines: search.code_lines
).call
rescue Timeout::Error => e
- io.puts "Search timed out SYNTAX_SUGGEST_TIMEOUT=#{timeout}, run with DEBUG=1 for more info"
+ io.puts "Search timed out SYNTAX_SUGGEST_TIMEOUT=#{timeout}, run with SYNTAX_SUGGEST_DEBUG=1 for more info"
io.puts e.backtrace.first(3).join($/)
end
@@ -91,7 +93,9 @@ module SyntaxSuggest
dir = Pathname(dir)
dir.join(time).tap { |path|
path.mkpath
- FileUtils.ln_sf(time, dir.join("last"))
+ alias_dir = dir.join("last")
+ FileUtils.rm_rf(alias_dir) if alias_dir.exist?
+ FileUtils.ln_sf(time, alias_dir)
}
end
@@ -117,11 +121,7 @@ module SyntaxSuggest
def self.valid_without?(without_lines:, code_lines:)
lines = code_lines - Array(without_lines).flatten
- if lines.empty?
- true
- else
- valid?(lines)
- end
+ lines.empty? || valid?(lines)
end
# SyntaxSuggest.invalid? [Private]
@@ -131,7 +131,7 @@ module SyntaxSuggest
source = source.join if source.is_a?(Array)
source = source.to_s
- Ripper.new(source).tap(&:parse).error?
+ Prism.parse(source).failure?
end
# SyntaxSuggest.valid? [Private]
@@ -185,11 +185,10 @@ require_relative "explain_syntax"
require_relative "clean_document"
# Helpers
-require_relative "lex_all"
require_relative "code_line"
require_relative "code_block"
require_relative "block_expand"
-require_relative "ripper_errors"
+require_relative "mini_stringio"
require_relative "priority_queue"
require_relative "unvisited_lines"
require_relative "around_block_scan"
@@ -197,3 +196,5 @@ require_relative "priority_engulf_queue"
require_relative "pathname_from_message"
require_relative "display_invalid_blocks"
require_relative "parse_blocks_from_indent_line"
+require_relative "visitor"
+require_relative "token"
diff --git a/lib/syntax_suggest/around_block_scan.rb b/lib/syntax_suggest/around_block_scan.rb
index 2a57d1b19e..dd9af729c5 100644
--- a/lib/syntax_suggest/around_block_scan.rb
+++ b/lib/syntax_suggest/around_block_scan.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+require_relative "scan_history"
+
module SyntaxSuggest
# This class is useful for exploring contents before and after
# a block
@@ -24,201 +26,207 @@ module SyntaxSuggest
# puts scan.before_index # => 0
# puts scan.after_index # => 3
#
- # Contents can also be filtered using AroundBlockScan#skip
- #
- # To grab the next surrounding indentation use AroundBlockScan#scan_adjacent_indent
class AroundBlockScan
def initialize(code_lines:, block:)
@code_lines = code_lines
- @orig_before_index = block.lines.first.index
- @orig_after_index = block.lines.last.index
@orig_indent = block.current_indent
- @skip_array = []
- @after_array = []
- @before_array = []
- @stop_after_kw = false
- @skip_hidden = false
- @skip_empty = false
+ @stop_after_kw = false
+ @force_add_empty = false
+ @force_add_hidden = false
+ @target_indent = nil
+
+ @scanner = ScanHistory.new(code_lines: code_lines, block: block)
+ end
+
+ # When using this flag, `scan_while` will
+ # bypass the block it's given and always add a
+ # line that responds truthy to `CodeLine#hidden?`
+ #
+ # Lines are hidden when they've been evaluated by
+ # the parser as part of a block and found to contain
+ # valid code.
+ def force_add_hidden
+ @force_add_hidden = true
+ self
end
- def skip(name)
- case name
- when :hidden?
- @skip_hidden = true
- when :empty?
- @skip_empty = true
- else
- raise "Unsupported skip #{name}"
- end
+ # When using this flag, `scan_while` will
+ # bypass the block it's given and always add a
+ # line that responds truthy to `CodeLine#empty?`
+ #
+ # Empty lines contain no code, only whitespace such
+ # as leading spaces a newline.
+ def force_add_empty
+ @force_add_empty = true
self
end
+ # Tells `scan_while` to look for mismatched keyword/end-s
+ #
+ # When scanning up, if we see more keywords then end-s it will
+ # stop. This might happen when scanning outside of a method body.
+ # the first scan line up would be a keyword and this setting would
+ # trigger a stop.
+ #
+ # When scanning down, stop if there are more end-s than keywords.
def stop_after_kw
@stop_after_kw = true
self
end
+ # Main work method
+ #
+ # The scan_while method takes a block that yields lines above and
+ # below the block. If the yield returns true, the @before_index
+ # or @after_index are modified to include the matched line.
+ #
+ # In addition to yielding individual lines, the internals of this
+ # object give a mini DSL to handle common situations such as
+ # stopping if we've found a keyword/end mis-match in one direction
+ # or the other.
def scan_while
- stop_next = false
-
- kw_count = 0
- end_count = 0
- index = before_lines.reverse_each.take_while do |line|
- next false if stop_next
- next true if @skip_hidden && line.hidden?
- next true if @skip_empty && line.empty?
+ stop_next_up = false
+ stop_next_down = false
- kw_count += 1 if line.is_kw?
- end_count += 1 if line.is_end?
- if @stop_after_kw && kw_count > end_count
- stop_next = true
- end
-
- yield line
- end.last&.index
+ @scanner.scan(
+ up: ->(line, kw_count, end_count) {
+ next false if stop_next_up
+ next true if @force_add_hidden && line.hidden?
+ next true if @force_add_empty && line.empty?
- if index && index < before_index
- @before_index = index
- end
+ if @stop_after_kw && kw_count > end_count
+ stop_next_up = true
+ end
- stop_next = false
- kw_count = 0
- end_count = 0
- index = after_lines.take_while do |line|
- next false if stop_next
- next true if @skip_hidden && line.hidden?
- next true if @skip_empty && line.empty?
+ yield line
+ },
+ down: ->(line, kw_count, end_count) {
+ next false if stop_next_down
+ next true if @force_add_hidden && line.hidden?
+ next true if @force_add_empty && line.empty?
- kw_count += 1 if line.is_kw?
- end_count += 1 if line.is_end?
- if @stop_after_kw && end_count > kw_count
- stop_next = true
- end
+ if @stop_after_kw && end_count > kw_count
+ stop_next_down = true
+ end
- yield line
- end.last&.index
+ yield line
+ }
+ )
- if index && index > after_index
- @after_index = index
- end
self
end
- def capture_neighbor_context
- lines = []
+ # Scanning is intentionally conservative because
+ # we have no way of rolling back an aggressive block (at this time)
+ #
+ # If a block was stopped for some trivial reason, (like an empty line)
+ # but the next line would have caused it to be balanced then we
+ # can check that condition and grab just one more line either up or
+ # down.
+ #
+ # For example, below if we're scanning up, line 2 might cause
+ # the scanning to stop. This is because empty lines might
+ # denote logical breaks where the user intended to chunk code
+ # which is a good place to stop and check validity. Unfortunately
+ # it also means we might have a "dangling" keyword or end.
+ #
+ # 1 def bark
+ # 2
+ # 3 end
+ #
+ # If lines 2 and 3 are in the block, then when this method is
+ # run it would see it is unbalanced, but that acquiring line 1
+ # would make it balanced, so that's what it does.
+ def lookahead_balance_one_line
kw_count = 0
end_count = 0
- before_lines.reverse_each do |line|
- next if line.empty?
- break if line.indent < @orig_indent
- next if line.indent != @orig_indent
-
+ lines.each do |line|
kw_count += 1 if line.is_kw?
end_count += 1 if line.is_end?
- if kw_count != 0 && kw_count == end_count
- lines << line
- break
- end
-
- lines << line
end
- lines.reverse!
-
- kw_count = 0
- end_count = 0
- after_lines.each do |line|
- next if line.empty?
- break if line.indent < @orig_indent
- next if line.indent != @orig_indent
-
- kw_count += 1 if line.is_kw?
- end_count += 1 if line.is_end?
- if kw_count != 0 && kw_count == end_count
- lines << line
- break
+ return self if kw_count == end_count # nothing to balance
+
+ @scanner.commit_if_changed # Rollback point if we don't find anything to optimize
+
+ # Try to eat up empty lines
+ @scanner.scan(
+ up: ->(line, _, _) { line.hidden? || line.empty? },
+ down: ->(line, _, _) { line.hidden? || line.empty? }
+ )
+
+ # More ends than keywords, check if we can balance expanding up
+ next_up = @scanner.next_up
+ next_down = @scanner.next_down
+ case end_count - kw_count
+ when 1
+ if next_up&.is_kw? && next_up.indent >= @target_indent
+ @scanner.scan(
+ up: ->(line, _, _) { line == next_up },
+ down: ->(line, _, _) { false }
+ )
+ @scanner.commit_if_changed
end
-
- lines << line
- end
-
- lines
- end
-
- def on_falling_indent
- last_indent = @orig_indent
- before_lines.reverse_each do |line|
- next if line.empty?
- if line.indent < last_indent
- yield line
- last_indent = line.indent
- end
- end
-
- last_indent = @orig_indent
- after_lines.each do |line|
- next if line.empty?
- if line.indent < last_indent
- yield line
- last_indent = line.indent
+ when -1
+ if next_down&.is_end? && next_down.indent >= @target_indent
+ @scanner.scan(
+ up: ->(line, _, _) { false },
+ down: ->(line, _, _) { line == next_down }
+ )
+ @scanner.commit_if_changed
end
end
- end
-
- def scan_neighbors
- scan_while { |line| line.not_empty? && line.indent >= @orig_indent }
- end
+ # Rollback any uncommitted changes
+ @scanner.stash_changes
- def next_up
- @code_lines[before_index.pred]
+ self
end
- def next_down
- @code_lines[after_index.next]
+ # Finds code lines at the same or greater indentation and adds them
+ # to the block
+ def scan_neighbors_not_empty
+ @target_indent = @orig_indent
+ scan_while { |line| line.not_empty? && line.indent >= @target_indent }
end
+ # Scan blocks based on indentation of next line above/below block
+ #
+ # Determines indentaion of the next line above/below the current block.
+ #
+ # Normally this is called when a block has expanded to capture all "neighbors"
+ # at the same (or greater) indentation and needs to expand out. For example
+ # the `def/end` lines surrounding a method.
def scan_adjacent_indent
before_after_indent = []
- before_after_indent << (next_up&.indent || 0)
- before_after_indent << (next_down&.indent || 0)
- indent = before_after_indent.min
- scan_while { |line| line.not_empty? && line.indent >= indent }
+ before_after_indent << (@scanner.next_up&.indent || 0)
+ before_after_indent << (@scanner.next_down&.indent || 0)
- self
- end
+ @target_indent = before_after_indent.min
+ scan_while { |line| line.not_empty? && line.indent >= @target_indent }
- def start_at_next_line
- before_index
- after_index
- @before_index -= 1
- @after_index += 1
self
end
+ # Return the currently matched lines as a `CodeBlock`
+ #
+ # When a `CodeBlock` is created it will gather metadata about
+ # itself, so this is not a free conversion. Avoid allocating
+ # more CodeBlock's than needed
def code_block
CodeBlock.new(lines: lines)
end
+ # Returns the lines matched by the current scan as an
+ # array of CodeLines
def lines
- @code_lines[before_index..after_index]
- end
-
- def before_index
- @before_index ||= @orig_before_index
- end
-
- def after_index
- @after_index ||= @orig_after_index
- end
-
- private def before_lines
- @code_lines[0...before_index] || []
+ @scanner.lines
end
- private def after_lines
- @code_lines[after_index.next..-1] || []
+ # Manageable rspec errors
+ def inspect
+ "#<#{self.class}:0x0000123843lol >"
end
end
end
diff --git a/lib/syntax_suggest/block_expand.rb b/lib/syntax_suggest/block_expand.rb
index 396b2c3a1a..2751ae2a64 100644
--- a/lib/syntax_suggest/block_expand.rb
+++ b/lib/syntax_suggest/block_expand.rb
@@ -35,30 +35,121 @@ module SyntaxSuggest
@code_lines = code_lines
end
+ # Main interface. Expand current indentation, before
+ # expanding to a lower indentation
def call(block)
if (next_block = expand_neighbors(block))
- return next_block
+ next_block
+ else
+ expand_indent(block)
end
-
- expand_indent(block)
end
+ # Expands code to the next lowest indentation
+ #
+ # For example:
+ #
+ # 1 def dog
+ # 2 print "dog"
+ # 3 end
+ #
+ # If a block starts on line 2 then it has captured all it's "neighbors" (code at
+ # the same indentation or higher). To continue expanding, this block must capture
+ # lines one and three which are at a different indentation level.
+ #
+ # This method allows fully expanded blocks to decrease their indentation level (so
+ # they can expand to capture more code up and down). It does this conservatively
+ # as there's no undo (currently).
def expand_indent(block)
- AroundBlockScan.new(code_lines: @code_lines, block: block)
- .skip(:hidden?)
+ now = AroundBlockScan.new(code_lines: @code_lines, block: block)
+ .force_add_hidden
.stop_after_kw
.scan_adjacent_indent
- .code_block
+
+ now.lookahead_balance_one_line
+
+ now.code_block
end
+ # A neighbor is code that is at or above the current indent line.
+ #
+ # First we build a block with all neighbors. If we can't go further
+ # then we decrease the indentation threshold and expand via indentation
+ # i.e. `expand_indent`
+ #
+ # Handles two general cases.
+ #
+ # ## Case #1: Check code inside of methods/classes/etc.
+ #
+ # It's important to note, that not everything in a given indentation level can be parsed
+ # as valid code even if it's part of valid code. For example:
+ #
+ # 1 hash = {
+ # 2 name: "richard",
+ # 3 dog: "cinco",
+ # 4 }
+ #
+ # In this case lines 2 and 3 will be neighbors, but they're invalid until `expand_indent`
+ # is called on them.
+ #
+ # When we are adding code within a method or class (at the same indentation level),
+ # use the empty lines to denote the programmer intended logical chunks.
+ # Stop and check each one. For example:
+ #
+ # 1 def dog
+ # 2 print "dog"
+ # 3
+ # 4 hash = {
+ # 5 end
+ #
+ # If we did not stop parsing at empty newlines then the block might mistakenly grab all
+ # the contents (lines 2, 3, and 4) and report them as being problems, instead of only
+ # line 4.
+ #
+ # ## Case #2: Expand/grab other logical blocks
+ #
+ # Once the search algorithm has converted all lines into blocks at a given indentation
+ # it will then `expand_indent`. Once the blocks that generates are expanded as neighbors
+ # we then begin seeing neighbors being other logical blocks i.e. a block's neighbors
+ # may be another method or class (something with keywords/ends).
+ #
+ # For example:
+ #
+ # 1 def bark
+ # 2
+ # 3 end
+ # 4
+ # 5 def sit
+ # 6 end
+ #
+ # In this case if lines 4, 5, and 6 are in a block when it tries to expand neighbors
+ # it will expand up. If it stops after line 2 or 3 it may cause problems since there's a
+ # valid kw/end pair, but the block will be checked without it.
+ #
+ # We try to resolve this edge case with `lookahead_balance_one_line` below.
def expand_neighbors(block)
- expanded_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
- .skip(:hidden?)
+ now = AroundBlockScan.new(code_lines: @code_lines, block: block)
+
+ # Initial scan
+ now
+ .force_add_hidden
.stop_after_kw
- .scan_neighbors
- .scan_while { |line| line.empty? } # Slurp up empties
+ .scan_neighbors_not_empty
+
+ # Slurp up empties
+ now
+ .scan_while { |line| line.empty? }
+
+ # If next line is kw and it will balance us, take it
+ expanded_lines = now
+ .lookahead_balance_one_line
.lines
+ # Don't allocate a block if it won't be used
+ #
+ # If nothing was taken, return nil to indicate that status
+ # used in `def call` to determine if
+ # we need to expand up/out (`expand_indent`)
if block.lines == expanded_lines
nil
else
@@ -66,7 +157,7 @@ module SyntaxSuggest
end
end
- # Managable rspec errors
+ # Manageable rspec errors
def inspect
"#<SyntaxSuggest::CodeBlock:0x0000123843lol >"
end
diff --git a/lib/syntax_suggest/capture/before_after_keyword_ends.rb b/lib/syntax_suggest/capture/before_after_keyword_ends.rb
new file mode 100644
index 0000000000..f53c57a4d1
--- /dev/null
+++ b/lib/syntax_suggest/capture/before_after_keyword_ends.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ module Capture
+ # Shows surrounding kw/end pairs
+ #
+ # The purpose of showing these extra pairs is due to cases
+ # of ambiguity when only one visible line is matched.
+ #
+ # For example:
+ #
+ # 1 class Dog
+ # 2 def bark
+ # 4 def eat
+ # 5 end
+ # 6 end
+ #
+ # In this case either line 2 could be missing an `end` or
+ # line 4 was an extra line added by mistake (it happens).
+ #
+ # When we detect the above problem it shows the issue
+ # as only being on line 2
+ #
+ # 2 def bark
+ #
+ # Showing "neighbor" keyword pairs gives extra context:
+ #
+ # 2 def bark
+ # 4 def eat
+ # 5 end
+ #
+ #
+ # Example:
+ #
+ # lines = BeforeAfterKeywordEnds.new(
+ # block: block,
+ # code_lines: code_lines
+ # ).call()
+ #
+ class BeforeAfterKeywordEnds
+ def initialize(code_lines:, block:)
+ @scanner = ScanHistory.new(code_lines: code_lines, block: block)
+ @original_indent = block.current_indent
+ end
+
+ def call
+ lines = []
+
+ @scanner.scan(
+ up: ->(line, kw_count, end_count) {
+ next true if line.empty?
+ break if line.indent < @original_indent
+ next true if line.indent != @original_indent
+
+ # If we're going up and have one complete kw/end pair, stop
+ if kw_count != 0 && kw_count == end_count
+ lines << line
+ break
+ end
+
+ lines << line if line.is_kw? || line.is_end?
+ true
+ },
+ down: ->(line, kw_count, end_count) {
+ next true if line.empty?
+ break if line.indent < @original_indent
+ next true if line.indent != @original_indent
+
+ # if we're going down and have one complete kw/end pair,stop
+ if kw_count != 0 && kw_count == end_count
+ lines << line
+ break
+ end
+
+ lines << line if line.is_kw? || line.is_end?
+ true
+ }
+ )
+ @scanner.stash_changes
+
+ lines
+ end
+ end
+ end
+end
diff --git a/lib/syntax_suggest/capture/falling_indent_lines.rb b/lib/syntax_suggest/capture/falling_indent_lines.rb
new file mode 100644
index 0000000000..1e046b2ba5
--- /dev/null
+++ b/lib/syntax_suggest/capture/falling_indent_lines.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ module Capture
+ # Shows the context around code provided by "falling" indentation
+ #
+ # If this is the original code lines:
+ #
+ # class OH
+ # def hello
+ # it "foo" do
+ # end
+ # end
+ #
+ # And this is the line that is captured
+ #
+ # it "foo" do
+ #
+ # It will yield its surrounding context:
+ #
+ # class OH
+ # def hello
+ # end
+ # end
+ #
+ # Example:
+ #
+ # FallingIndentLines.new(
+ # block: block,
+ # code_lines: @code_lines
+ # ).call do |line|
+ # @lines_to_output << line
+ # end
+ #
+ class FallingIndentLines
+ def initialize(code_lines:, block:)
+ @lines = nil
+ @scanner = ScanHistory.new(code_lines: code_lines, block: block)
+ @original_indent = block.current_indent
+ end
+
+ def call(&yieldable)
+ last_indent_up = @original_indent
+ last_indent_down = @original_indent
+
+ @scanner.commit_if_changed
+ @scanner.scan(
+ up: ->(line, _, _) {
+ next true if line.empty?
+
+ if line.indent < last_indent_up
+ yieldable.call(line)
+ last_indent_up = line.indent
+ end
+ true
+ },
+ down: ->(line, _, _) {
+ next true if line.empty?
+
+ if line.indent < last_indent_down
+ yieldable.call(line)
+ last_indent_down = line.indent
+ end
+ true
+ }
+ )
+ @scanner.stash_changes
+ end
+ end
+ end
+end
diff --git a/lib/syntax_suggest/capture_code_context.rb b/lib/syntax_suggest/capture_code_context.rb
index c74a366a25..5de9ec09cc 100644
--- a/lib/syntax_suggest/capture_code_context.rb
+++ b/lib/syntax_suggest/capture_code_context.rb
@@ -1,13 +1,21 @@
# frozen_string_literal: true
module SyntaxSuggest
+ module Capture
+ end
+end
+
+require_relative "capture/falling_indent_lines"
+require_relative "capture/before_after_keyword_ends"
+
+module SyntaxSuggest
# Turns a "invalid block(s)" into useful context
#
# There are three main phases in the algorithm:
#
# 1. Sanitize/format input source
# 2. Search for invalid blocks
- # 3. Format invalid blocks into something meaninful
+ # 3. Format invalid blocks into something meaningful
#
# This class handles the third part.
#
@@ -18,7 +26,7 @@ module SyntaxSuggest
# they can't add extra data that's not present.
#
# In the case of known ambiguious cases, this class adds context
- # back to the ambiguitiy so the programmer has full information.
+ # back to the ambiguity so the programmer has full information.
#
# Beyond handling these ambiguities, it also captures surrounding
# code context information:
@@ -55,6 +63,10 @@ module SyntaxSuggest
capture_falling_indent(block)
end
+ sorted_lines
+ end
+
+ def sorted_lines
@lines_to_output.select!(&:not_empty?)
@lines_to_output.uniq!
@lines_to_output.sort!
@@ -76,12 +88,11 @@ module SyntaxSuggest
# end
# end
#
- #
def capture_falling_indent(block)
- AroundBlockScan.new(
+ Capture::FallingIndentLines.new(
block: block,
code_lines: @code_lines
- ).on_falling_indent do |line|
+ ).call do |line|
@lines_to_output << line
end
end
@@ -116,9 +127,10 @@ module SyntaxSuggest
def capture_before_after_kws(block)
return unless block.visible_lines.count == 1
- around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
- .start_at_next_line
- .capture_neighbor_context
+ around_lines = Capture::BeforeAfterKeywordEnds.new(
+ code_lines: @code_lines,
+ block: block
+ ).call
around_lines -= block.lines
@@ -137,10 +149,10 @@ module SyntaxSuggest
# puts "woof" # 3
# end # 4
#
- # However due to https://github.com/zombocom/syntax_suggest/issues/32
+ # However due to https://github.com/ruby/syntax_suggest/issues/32
# the problem line will be identified as:
#
- # ❯ class Dog # 1
+ # > class Dog # 1
#
# Because lines 2, 3, and 4 are technically valid code and are expanded
# first, deemed valid, and hidden. We need to un-hide the matching end
@@ -200,7 +212,7 @@ module SyntaxSuggest
#
# the problem line will be identified as:
#
- # ❯ end # 4
+ # > end # 4
#
# This happens because lines 1, 2, and 3 are technically valid code and are expanded
# first, deemed valid, and hidden. We need to un-hide the matching keyword on
diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb
index b572189259..94c68d8ad4 100644
--- a/lib/syntax_suggest/clean_document.rb
+++ b/lib/syntax_suggest/clean_document.rb
@@ -10,7 +10,7 @@ module SyntaxSuggest
#
# 1. Sanitize/format input source
# 2. Search for invalid blocks
- # 3. Format invalid blocks into something meaninful
+ # 3. Format invalid blocks into something meaningful
#
# This class handles the first part.
#
@@ -47,9 +47,9 @@ module SyntaxSuggest
# ## Heredocs
#
# A heredoc is an way of defining a multi-line string. They can cause many
- # problems. If left as a single line, Ripper would try to parse the contents
+ # problems. If left as a single line, the parser would try to parse the contents
# as ruby code rather than as a string. Even without this problem, we still
- # hit an issue with indentation
+ # hit an issue with indentation:
#
# 1 foo = <<~HEREDOC
# 2 "Be yourself; everyone else is already taken.""
@@ -66,27 +66,9 @@ module SyntaxSuggest
#
# All of these problems are fixed by joining the whole heredoc into a single
# line.
- #
- # ## Comments and whitespace
- #
- # Comments can throw off the way the lexer tells us that the line
- # logically belongs with the next line. This is valid ruby but
- # results in a different lex output than before:
- #
- # 1 User.
- # 2 where(name: "schneems").
- # 3 # Comment here
- # 4 first
- #
- # To handle this we can replace comment lines with empty lines
- # and then re-lex the source. This removal and re-lexing preserves
- # line index and document size, but generates an easier to work with
- # document.
- #
class CleanDocument
def initialize(source:)
- lines = clean_sweep(source: source)
- @document = CodeLine.from_source(lines.join, lines: lines)
+ @document = CodeLine.from_source(source)
end
# Call all of the document "cleaners"
@@ -110,60 +92,6 @@ module SyntaxSuggest
@document.join
end
- # Remove comments and whitespace only lines
- #
- # replace with empty newlines
- #
- # source = <<~'EOM'
- # # Comment 1
- # puts "hello"
- # # Comment 2
- # puts "world"
- # EOM
- #
- # lines = CleanDocument.new(source: source).lines
- # expect(lines[0].to_s).to eq("\n")
- # expect(lines[1].to_s).to eq("puts "hello")
- # expect(lines[2].to_s).to eq("\n")
- # expect(lines[3].to_s).to eq("puts "world")
- #
- # Important: This must be done before lexing.
- #
- # After this change is made, we lex the document because
- # removing comments can change how the doc is parsed.
- #
- # For example:
- #
- # values = LexAll.new(source: <<~EOM))
- # User.
- # # comment
- # where(name: 'schneems')
- # EOM
- # expect(
- # values.count {|v| v.type == :on_ignored_nl}
- # ).to eq(1)
- #
- # After the comment is removed:
- #
- # values = LexAll.new(source: <<~EOM))
- # User.
- #
- # where(name: 'schneems')
- # EOM
- # expect(
- # values.count {|v| v.type == :on_ignored_nl}
- # ).to eq(2)
- #
- def clean_sweep(source:)
- source.lines.map do |line|
- if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs
- $/
- else
- line
- end
- end
- end
-
# Smushes all heredoc lines into one line
#
# source = <<~'EOM'
@@ -180,11 +108,11 @@ module SyntaxSuggest
start_index_stack = []
heredoc_beg_end_index = []
lines.each do |line|
- line.lex.each do |lex_value|
- case lex_value.type
- when :on_heredoc_beg
+ line.tokens.each do |token|
+ case token.type
+ when :HEREDOC_START
start_index_stack << line.index
- when :on_heredoc_end
+ when :HEREDOC_END
start_index = start_index_stack.pop
end_index = line.index
heredoc_beg_end_index << [start_index, end_index]
@@ -210,20 +138,10 @@ module SyntaxSuggest
# expect(lines[0].to_s).to eq(source)
# expect(lines[1].to_s).to eq("")
#
- # The one known case this doesn't handle is:
- #
- # Ripper.lex <<~EOM
- # a &&
- # b ||
- # c
- # EOM
- #
- # For some reason this introduces `on_ignore_newline` but with BEG type
- #
def join_consecutive!
- consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
- take_while_including(code_line.index..-1) do |line|
- line.ignore_newline_not_beg?
+ consecutive_groups = @document.select(&:consecutive?).map do |code_line|
+ take_while_including(code_line.index..) do |line|
+ line.consecutive?
end
end
@@ -243,7 +161,7 @@ module SyntaxSuggest
# expect(lines[1].to_s).to eq("")
def join_trailing_slash!
trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
- take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
+ take_while_including(code_line.index..) { |x| x.trailing_slash? }
end
join_groups(trailing_groups)
self
@@ -265,22 +183,23 @@ module SyntaxSuggest
groups.each do |lines|
line = lines.first
- # Handle the case of multiple groups in a a row
+ # Handle the case of multiple groups in a row
# if one is already replaced, move on
next if @document[line.index].empty?
# Join group into the first line
@document[line.index] = CodeLine.new(
- lex: lines.map(&:lex).flatten,
+ tokens: lines.map(&:tokens).flatten,
line: lines.join,
- index: line.index
+ index: line.index,
+ consecutive: false
)
# Hide the rest of the lines
- lines[1..-1].each do |line|
+ lines[1..].each do |line|
# The above lines already have newlines in them, if add more
# then there will be double newline, use an empty line instead
- @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+ @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false)
end
end
self
@@ -291,7 +210,7 @@ module SyntaxSuggest
# Like `take_while` except when it stops
# iterating, it also returns the line
# that caused it to stop
- def take_while_including(range = 0..-1)
+ def take_while_including(range = 0..)
take_next_and_stop = false
@document[range].take_while do |line|
next if take_next_and_stop
diff --git a/lib/syntax_suggest/cli.rb b/lib/syntax_suggest/cli.rb
index b89fa5d013..967f77bf70 100644
--- a/lib/syntax_suggest/cli.rb
+++ b/lib/syntax_suggest/cli.rb
@@ -65,6 +65,7 @@ module SyntaxSuggest
)
if display.document_ok?
+ @io.puts "Syntax OK"
@exit_obj.exit(0)
else
@exit_obj.exit(1)
@@ -91,8 +92,8 @@ module SyntaxSuggest
# ...
- ❯ 10 defdog
- ❯ 15 end
+ > 10 defdog
+ > 15 end
ENV options:
diff --git a/lib/syntax_suggest/code_block.rb b/lib/syntax_suggest/code_block.rb
index 61e7986da4..d842890300 100644
--- a/lib/syntax_suggest/code_block.rb
+++ b/lib/syntax_suggest/code_block.rb
@@ -81,7 +81,7 @@ module SyntaxSuggest
# lines then the result cannot be invalid
#
# That means there's no reason to re-check all
- # lines with ripper (which is expensive).
+ # lines with the parser (which is expensive).
# Benchmark in commit message
@valid = if lines.all? { |l| l.hidden? || l.empty? }
true
diff --git a/lib/syntax_suggest/code_frontier.rb b/lib/syntax_suggest/code_frontier.rb
index 8e93b32514..38d5375ef4 100644
--- a/lib/syntax_suggest/code_frontier.rb
+++ b/lib/syntax_suggest/code_frontier.rb
@@ -8,7 +8,7 @@ module SyntaxSuggest
#
# 1. Sanitize/format input source
# 2. Search for invalid blocks
- # 3. Format invalid blocks into something meaninful
+ # 3. Format invalid blocks into something meaningful
#
# The Code frontier is a critical part of the second step
#
@@ -117,7 +117,7 @@ module SyntaxSuggest
if ENV["SYNTAX_SUGGEST_DEBUG"]
puts "```"
- puts @queue.peek.to_s
+ puts @queue.peek
puts "```"
puts " @frontier indent: #{frontier_indent}"
puts " @unvisited indent: #{unvisited_indent}"
diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb
index dc738ab128..7fb1aae26a 100644
--- a/lib/syntax_suggest/code_line.rb
+++ b/lib/syntax_suggest/code_line.rb
@@ -26,34 +26,66 @@ module SyntaxSuggest
# Returns an array of CodeLine objects
# from the source string
- def self.from_source(source, lines: nil)
- lines ||= source.lines
- lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
- lines.map.with_index do |line, index|
+ def self.from_source(source)
+ source = +source
+ parse_result = Prism.parse_lex(source)
+ ast, tokens = parse_result.value
+
+ clean_comments!(source, parse_result.comments)
+
+ visitor = Visitor.new
+ visitor.visit(ast)
+ tokens.sort_by! { |token, _state| token.location.start_line }
+
+ prev_token = nil
+ tokens.map! do |token, _state|
+ prev_token = Token.new(token, prev_token, visitor)
+ end
+
+ tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
+ source.lines.map.with_index do |line, index|
CodeLine.new(
line: line,
index: index,
- lex: lex_array_for_line[index + 1]
+ tokens: tokens_for_line[index + 1],
+ consecutive: visitor.consecutive_lines.include?(index + 1)
)
end
end
- attr_reader :line, :index, :lex, :line_number, :indent
- def initialize(line:, index:, lex:)
- @lex = lex
+ # Remove comments that apear on their own in source. They will never be the cause
+ # of syntax errors and are just visual noise. Example:
+ #
+ # source = +<<~RUBY
+ # # Comment-only line
+ # foo # Inline comment
+ # RUBY
+ # CodeLine.clean_comments!(source, Prism.parse(source).comments)
+ # source # => "\nfoo # Inline comment\n"
+ def self.clean_comments!(source, comments)
+ # Iterate backwards since we are modifying the source in place and must preserve
+ # the offsets. Prism comments are sorted by their location in the source.
+ comments.reverse_each do |comment|
+ next if comment.trailing?
+ source.bytesplice(comment.location.start_offset, comment.location.length, "")
+ end
+ end
+
+ attr_reader :line, :index, :tokens, :line_number, :indent
+ def initialize(line:, index:, tokens:, consecutive:)
+ @tokens = tokens
@line = line
@index = index
+ @consecutive = consecutive
@original = line
@line_number = @index + 1
strip_line = line.dup
strip_line.lstrip!
- if strip_line.empty?
- @empty = true
- @indent = 0
+ @indent = if (@empty = strip_line.empty?)
+ line.length - 1 # Newline removed from strip_line is not "whitespace"
else
- @empty = false
- @indent = line.length - strip_line.length
+ line.length - strip_line.length
end
set_kw_end
@@ -153,29 +185,16 @@ module SyntaxSuggest
index <=> other.index
end
- # [Not stable API]
- #
- # Lines that have a `on_ignored_nl` type token and NOT
- # a `BEG` type seem to be a good proxy for the ability
- # to join multiple lines into one.
- #
- # This predicate method is used to determine when those
- # two criteria have been met.
- #
- # The one known case this doesn't handle is:
- #
- # Ripper.lex <<~EOM
- # a &&
- # b ||
- # c
- # EOM
- #
- # For some reason this introduces `on_ignore_newline` but with BEG type
- def ignore_newline_not_beg?
- @ignore_newline_not_beg
+ # Can this line be logically joined together
+ # with the following line? Determined by walking
+ # the AST
+ def consecutive?
+ @consecutive
end
- # Determines if the given line has a trailing slash
+ # Determines if the given line has a trailing slash.
+ # Simply check if the line contains a backslash after
+ # the content of the last token.
#
# lines = CodeLine.from_source(<<~EOM)
# it "foo" \
@@ -183,55 +202,19 @@ module SyntaxSuggest
# expect(lines.first.trailing_slash?).to eq(true)
#
def trailing_slash?
- last = @lex.last
- return false unless last
- return false unless last.type == :on_sp
-
- last.token == TRAILING_SLASH
+ return unless (last = @tokens.last)
+ @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil
end
- # Endless method detection
- #
- # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
- # Detecting a "oneliner" seems to need a state machine.
- # This can be done by looking mostly at the "state" (last value):
- #
- # ENDFN -> BEG (token = '=' ) -> END
- #
private def set_kw_end
- oneliner_count = 0
- in_oneliner_def = nil
-
kw_count = 0
end_count = 0
- @ignore_newline_not_beg = false
- @lex.each do |lex|
- kw_count += 1 if lex.is_kw?
- end_count += 1 if lex.is_end?
-
- if lex.type == :on_ignored_nl
- @ignore_newline_not_beg = !lex.expr_beg?
- end
-
- if in_oneliner_def.nil?
- in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN)
- elsif lex.state.allbits?(Ripper::EXPR_ENDFN)
- # Continue
- elsif lex.state.allbits?(Ripper::EXPR_BEG)
- in_oneliner_def = :BODY if lex.token == "="
- elsif lex.state.allbits?(Ripper::EXPR_END)
- # We found an endless method, count it
- oneliner_count += 1 if in_oneliner_def == :BODY
-
- in_oneliner_def = nil
- else
- in_oneliner_def = nil
- end
+ @tokens.each do |token|
+ kw_count += 1 if token.is_kw?
+ end_count += 1 if token.is_end?
end
- kw_count -= oneliner_count
-
@is_kw = (kw_count - end_count) > 0
@is_end = (end_count - kw_count) > 0
end
diff --git a/lib/syntax_suggest/code_search.rb b/lib/syntax_suggest/code_search.rb
index 2a86dfea90..7628dcd131 100644
--- a/lib/syntax_suggest/code_search.rb
+++ b/lib/syntax_suggest/code_search.rb
@@ -43,7 +43,7 @@ module SyntaxSuggest
def initialize(source, record_dir: DEFAULT_VALUE)
record_dir = if record_dir == DEFAULT_VALUE
- ENV["SYNTAX_SUGGEST_RECORD_DIR"] || ENV["SYNTAX_SUGGEST_DEBUG"] ? "tmp" : nil
+ (ENV["SYNTAX_SUGGEST_RECORD_DIR"] || ENV["SYNTAX_SUGGEST_DEBUG"]) ? "tmp" : nil
else
record_dir
end
@@ -73,7 +73,7 @@ module SyntaxSuggest
if ENV["SYNTAX_SUGGEST_DEBUG"]
puts "\n\n==== #{filename} ===="
puts "\n```#{block.starts_at}..#{block.ends_at}"
- puts block.to_s
+ puts block
puts "```"
puts " block indent: #{block.current_indent}"
end
diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb
index 40f5fe1375..ffbc922eed 100644
--- a/lib/syntax_suggest/core_ext.rb
+++ b/lib/syntax_suggest/core_ext.rb
@@ -1,101 +1,47 @@
# frozen_string_literal: true
-# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require`
-if SyntaxError.method_defined?(:detailed_message)
- module SyntaxSuggest
- class MiniStringIO
- def initialize(isatty: $stderr.isatty)
- @string = +""
- @isatty = isatty
- end
-
- attr_reader :isatty
- def puts(value = $/, **)
- @string << value
- end
-
- attr_reader :string
- end
- end
-
- SyntaxError.prepend Module.new {
- def detailed_message(highlight: true, syntax_suggest: true, **kwargs)
- return super unless syntax_suggest
-
- require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
- message = super
- file = if highlight
- SyntaxSuggest::PathnameFromMessage.new(super(highlight: false, **kwargs)).call.name
- else
- SyntaxSuggest::PathnameFromMessage.new(message).call.name
- end
-
- io = SyntaxSuggest::MiniStringIO.new
-
- if file
- SyntaxSuggest.call(
- io: io,
- source: file.read,
- filename: file,
- terminal: highlight
- )
- annotation = io.string
-
- annotation + message
- else
+module SyntaxSuggest
+ # SyntaxSuggest.module_for_detailed_message [Private]
+ #
+ # Used to monkeypatch SyntaxError via Module.prepend
+ def self.module_for_detailed_message
+ Module.new {
+ def detailed_message(highlight: true, syntax_suggest: true, **kwargs)
+ return super unless syntax_suggest
+
+ require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
+
+ message = super
+
+ if path
+ file = Pathname.new(path)
+ io = SyntaxSuggest::MiniStringIO.new
+
+ SyntaxSuggest.call(
+ io: io,
+ source: file.read,
+ filename: file,
+ terminal: highlight
+ )
+ annotation = io.string
+
+ annotation += "\n" unless annotation.end_with?("\n")
+
+ annotation + message
+ else
+ message
+ end
+ rescue => e
+ if ENV["SYNTAX_SUGGEST_DEBUG"]
+ $stderr.warn(e.message)
+ $stderr.warn(e.backtrace)
+ end
+
+ # Ignore internal errors
message
end
- rescue => e
- if ENV["SYNTAX_SUGGEST_DEBUG"]
- $stderr.warn(e.message)
- $stderr.warn(e.backtrace)
- end
-
- # Ignore internal errors
- message
- end
- }
-else
- autoload :Pathname, "pathname"
-
- # Monkey patch kernel to ensure that all `require` calls call the same
- # method
- module Kernel
- module_function
-
- alias_method :syntax_suggest_original_require, :require
- alias_method :syntax_suggest_original_require_relative, :require_relative
- alias_method :syntax_suggest_original_load, :load
-
- def load(file, wrap = false)
- syntax_suggest_original_load(file)
- rescue SyntaxError => e
- require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
- SyntaxSuggest.handle_error(e)
- end
-
- def require(file)
- syntax_suggest_original_require(file)
- rescue SyntaxError => e
- require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
- SyntaxSuggest.handle_error(e)
- end
-
- def require_relative(file)
- if Pathname.new(file).absolute?
- syntax_suggest_original_require file
- else
- relative_from = caller_locations(1..1).first
- relative_from_path = relative_from.absolute_path || relative_from.path
- syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path)
- end
- rescue SyntaxError => e
- require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-
- SyntaxSuggest.handle_error(e)
- end
+ }
end
end
+
+SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message)
diff --git a/lib/syntax_suggest/display_code_with_line_numbers.rb b/lib/syntax_suggest/display_code_with_line_numbers.rb
index 23f4b2d1ee..a18d62e54b 100644
--- a/lib/syntax_suggest/display_code_with_line_numbers.rb
+++ b/lib/syntax_suggest/display_code_with_line_numbers.rb
@@ -14,8 +14,8 @@ module SyntaxSuggest
# # =>
# 1
# 2 def cat
- # ❯ 3 Dir.chdir
- # ❯ 4 end
+ # > 3 Dir.chdir
+ # > 4 end
# 5 end
# 6
class DisplayCodeWithLineNumbers
@@ -50,7 +50,7 @@ module SyntaxSuggest
private def format(contents:, number:, empty:, highlight: false)
string = +""
string << if highlight
- "❯ "
+ "> "
else
" "
end
diff --git a/lib/syntax_suggest/display_invalid_blocks.rb b/lib/syntax_suggest/display_invalid_blocks.rb
index bc1143f4b0..5e79b3a262 100644
--- a/lib/syntax_suggest/display_invalid_blocks.rb
+++ b/lib/syntax_suggest/display_invalid_blocks.rb
@@ -14,7 +14,7 @@ module SyntaxSuggest
@filename = filename
@code_lines = code_lines
- @terminal = terminal == DEFAULT_VALUE ? io.isatty : terminal
+ @terminal = (terminal == DEFAULT_VALUE) ? io.isatty : terminal
end
def document_ok?
@@ -23,7 +23,6 @@ module SyntaxSuggest
def call
if document_ok?
- @io.puts "Syntax OK"
return self
end
diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb
index 142ed2e269..d7f5262ddb 100644
--- a/lib/syntax_suggest/explain_syntax.rb
+++ b/lib/syntax_suggest/explain_syntax.rb
@@ -1,8 +1,14 @@
# frozen_string_literal: true
-require_relative "left_right_lex_count"
+require_relative "left_right_token_count"
module SyntaxSuggest
+ class GetParseErrors
+ def self.errors(source)
+ Prism.parse(source).errors.map(&:message)
+ end
+ end
+
# Explains syntax errors based on their source
#
# example:
@@ -15,8 +21,8 @@ module SyntaxSuggest
# # => "Unmatched keyword, missing `end' ?"
#
# When the error cannot be determined by lexical counting
- # then ripper is run against the input and the raw ripper
- # errors returned.
+ # then the parser is run against the input and the raw
+ # errors are returned.
#
# Example:
#
@@ -39,14 +45,14 @@ module SyntaxSuggest
def initialize(code_lines:)
@code_lines = code_lines
- @left_right = LeftRightLexCount.new
+ @left_right = LeftRightTokenCount.new
@missing = nil
end
def call
@code_lines.each do |line|
- line.lex.each do |lex|
- @left_right.count_lex(lex)
+ line.tokens.each do |token|
+ @left_right.count_token(token)
end
end
@@ -91,10 +97,10 @@ module SyntaxSuggest
# Returns an array of syntax error messages
#
# If no missing pairs are found it falls back
- # on the original ripper error messages
+ # on the original error messages
def errors
if missing.empty?
- return RipperErrors.new(@code_lines.map(&:original).join).call.errors
+ return GetParseErrors.errors(@code_lines.map(&:original).join).uniq
end
missing.map { |miss| why(miss) }
diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_token_count.rb
index 6fcae7482b..e0562ba9cd 100644
--- a/lib/syntax_suggest/left_right_lex_count.rb
+++ b/lib/syntax_suggest/left_right_token_count.rb
@@ -9,19 +9,19 @@ module SyntaxSuggest
#
# Example:
#
- # left_right = LeftRightLexCount.new
+ # left_right = LeftRightTokenCount.new
# left_right.count_kw
# left_right.missing.first
# # => "end"
#
- # left_right = LeftRightLexCount.new
+ # left_right = LeftRightTokenCount.new
# source = "{ a: b, c: d" # Note missing '}'
- # LexAll.new(source: source).each do |lex|
- # left_right.count_lex(lex)
+ # LexAll.new(source: source).each do |token|
+ # left_right.count_token(token)
# end
# left_right.missing.first
# # => "}"
- class LeftRightLexCount
+ class LeftRightTokenCount
def initialize
@kw_count = 0
@end_count = 0
@@ -49,52 +49,46 @@ module SyntaxSuggest
#
# Example:
#
- # left_right = LeftRightLexCount.new
- # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG))
+ # token = CodeLine.from_source("{").first.tokens.first
+ # left_right = LeftRightTokenCount.new
+ # left_right.count_token(Token.new(token)
# left_right.count_for_char("{")
# # => 1
# left_right.count_for_char("}")
# # => 0
- def count_lex(lex)
- case lex.type
- when :on_tstring_content
+ def count_token(token)
+ case token.type
+ when :STRING_CONTENT
# ^^^
# Means it's a string or a symbol `"{"` rather than being
# part of a data structure (like a hash) `{ a: b }`
# ignore it.
- when :on_words_beg, :on_symbos_beg, :on_qwords_beg,
- :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg
+ when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W,
+ :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN
# ^^^
# Handle shorthand syntaxes like `%Q{ i am a string }`
#
# The start token will be the full thing `%Q{` but we
# need to count it as if it's a `{`. Any token
# can be used
- char = lex.token[-1]
+ char = token.value[-1]
@count_for_char[char] += 1 if @count_for_char.key?(char)
- when :on_embexpr_beg
+ when :EMBEXPR_BEGIN
# ^^^
# Embedded string expressions like `"#{foo} <-embed"`
# are parsed with chars:
#
- # `#{` as :on_embexpr_beg
- # `}` as :on_embexpr_end
- #
- # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end
- # because sometimes the lexer thinks something is an embed
- # string end, when it is not like `lol = }` (no clue why).
+ # `#{` as :EMBEXPR_BEGIN
+ # `}` as :EMBEXPR_END
#
# When we see `#{` count it as a `{` or we will
# have a mis-match count.
#
- case lex.token
- when "\#{"
- @count_for_char["{"] += 1
- end
+ @count_for_char["{"] += 1
else
- @end_count += 1 if lex.is_end?
- @kw_count += 1 if lex.is_kw?
- @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token)
+ @end_count += 1 if token.is_end?
+ @kw_count += 1 if token.is_kw?
+ @count_for_char[token.value] += 1 if @count_for_char.key?(token.value)
end
end
diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb
deleted file mode 100644
index 132cba9f5d..0000000000
--- a/lib/syntax_suggest/lex_all.rb
+++ /dev/null
@@ -1,55 +0,0 @@
-# frozen_string_literal: true
-
-module SyntaxSuggest
- # Ripper.lex is not guaranteed to lex the entire source document
- #
- # lex = LexAll.new(source: source)
- # lex.each do |value|
- # puts value.line
- # end
- class LexAll
- include Enumerable
-
- def initialize(source:, source_lines: nil)
- @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos)
- lineno = @lex.last.pos.first + 1
- source_lines ||= source.lines
- last_lineno = source_lines.length
-
- until lineno >= last_lineno
- lines = source_lines[lineno..-1]
-
- @lex.concat(
- Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos)
- )
- lineno = @lex.last.pos.first + 1
- end
-
- last_lex = nil
- @lex.map! { |elem|
- last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex)
- }
- end
-
- def to_a
- @lex
- end
-
- def each
- return @lex.each unless block_given?
- @lex.each do |x|
- yield x
- end
- end
-
- def [](index)
- @lex[index]
- end
-
- def last
- @lex.last
- end
- end
-end
-
-require_relative "lex_value"
diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb
deleted file mode 100644
index 008cc105b5..0000000000
--- a/lib/syntax_suggest/lex_value.rb
+++ /dev/null
@@ -1,70 +0,0 @@
-# frozen_string_literal: true
-
-module SyntaxSuggest
- # Value object for accessing lex values
- #
- # This lex:
- #
- # [1, 0], :on_ident, "describe", CMDARG
- #
- # Would translate into:
- #
- # lex.line # => 1
- # lex.type # => :on_indent
- # lex.token # => "describe"
- class LexValue
- attr_reader :line, :type, :token, :state
-
- def initialize(line, type, token, state, last_lex = nil)
- @line = line
- @type = type
- @token = token
- @state = state
-
- set_kw_end(last_lex)
- end
-
- private def set_kw_end(last_lex)
- @is_end = false
- @is_kw = false
- return if type != :on_kw
- #
- return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953
-
- case token
- when "if", "unless", "while", "until"
- # Only count if/unless when it's not a "trailing" if/unless
- # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
- @is_kw = true unless expr_label?
- when "def", "case", "for", "begin", "class", "module", "do"
- @is_kw = true
- when "end"
- @is_end = true
- end
- end
-
- def fname?
- state.allbits?(Ripper::EXPR_FNAME)
- end
-
- def ignore_newline?
- type == :on_ignored_nl
- end
-
- def is_end?
- @is_end
- end
-
- def is_kw?
- @is_kw
- end
-
- def expr_beg?
- state.anybits?(Ripper::EXPR_BEG)
- end
-
- def expr_label?
- state.allbits?(Ripper::EXPR_LABEL)
- end
- end
-end
diff --git a/lib/syntax_suggest/mini_stringio.rb b/lib/syntax_suggest/mini_stringio.rb
new file mode 100644
index 0000000000..1a82572eeb
--- /dev/null
+++ b/lib/syntax_suggest/mini_stringio.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ # Mini String IO [Private]
+ #
+ # Acts like a StringIO with reduced API, but without having to require that
+ # class.
+ #
+ # The original codebase emitted directly to $stderr, but now SyntaxError#detailed_message
+ # needs a string output. To accomplish that we kept the original print infrastructure in place and
+ # added this class to accumulate the print output into a string.
+ class MiniStringIO
+ EMPTY_ARG = Object.new
+
+ def initialize(isatty: $stderr.isatty)
+ @string = +""
+ @isatty = isatty
+ end
+
+ attr_reader :isatty
+ def puts(value = EMPTY_ARG, **)
+ if !value.equal?(EMPTY_ARG)
+ @string << value
+ end
+ @string << $/
+ end
+
+ attr_reader :string
+ end
+end
diff --git a/lib/syntax_suggest/parse_blocks_from_indent_line.rb b/lib/syntax_suggest/parse_blocks_from_indent_line.rb
index d1071732fe..39dfca55d2 100644
--- a/lib/syntax_suggest/parse_blocks_from_indent_line.rb
+++ b/lib/syntax_suggest/parse_blocks_from_indent_line.rb
@@ -8,7 +8,7 @@ module SyntaxSuggest
# grabbing one that contains only an "end". In this example:
#
# def dog
- # begonn # mispelled `begin`
+ # begonn # misspelled `begin`
# puts "bark"
# end
# end
@@ -36,8 +36,8 @@ module SyntaxSuggest
# Builds blocks from bottom up
def each_neighbor_block(target_line)
scan = AroundBlockScan.new(code_lines: code_lines, block: CodeBlock.new(lines: target_line))
- .skip(:empty?)
- .skip(:hidden?)
+ .force_add_empty
+ .force_add_hidden
.scan_while { |line| line.indent >= target_line.indent }
neighbors = scan.code_block.lines
diff --git a/lib/syntax_suggest/pathname_from_message.rb b/lib/syntax_suggest/pathname_from_message.rb
index ea1a90856e..ab90227427 100644
--- a/lib/syntax_suggest/pathname_from_message.rb
+++ b/lib/syntax_suggest/pathname_from_message.rb
@@ -4,7 +4,7 @@ module SyntaxSuggest
# Converts a SyntaxError message to a path
#
# Handles the case where the filename has a colon in it
- # such as on a windows file system: https://github.com/zombocom/syntax_suggest/issues/111
+ # such as on a windows file system: https://github.com/ruby/syntax_suggest/issues/111
#
# Example:
#
@@ -13,7 +13,7 @@ module SyntaxSuggest
# # => "/tmp/scratch.rb"
#
class PathnameFromMessage
- EVAL_RE = /^\(eval\):\d+/
+ EVAL_RE = /^\(eval.*\):\d+/
STREAMING_RE = /^-:\d+/
attr_reader :name
diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb
deleted file mode 100644
index 48eb206e48..0000000000
--- a/lib/syntax_suggest/ripper_errors.rb
+++ /dev/null
@@ -1,36 +0,0 @@
-# frozen_string_literal: true
-
-module SyntaxSuggest
- # Capture parse errors from ripper
- #
- # Example:
- #
- # puts RipperErrors.new(" def foo").call.errors
- # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"]
- class RipperErrors < Ripper
- attr_reader :errors
-
- # Comes from ripper, called
- # on every parse error, msg
- # is a string
- def on_parse_error(msg)
- @errors ||= []
- @errors << msg
- end
-
- alias_method :on_alias_error, :on_parse_error
- alias_method :on_assign_error, :on_parse_error
- alias_method :on_class_name_error, :on_parse_error
- alias_method :on_param_error, :on_parse_error
- alias_method :compile_error, :on_parse_error
-
- def call
- @run_once ||= begin
- @errors = []
- parse
- true
- end
- self
- end
- end
-end
diff --git a/lib/syntax_suggest/scan_history.rb b/lib/syntax_suggest/scan_history.rb
new file mode 100644
index 0000000000..dc36e6ba2e
--- /dev/null
+++ b/lib/syntax_suggest/scan_history.rb
@@ -0,0 +1,134 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ # Scans up/down from the given block
+ #
+ # You can try out a change, stash it, or commit it to save for later
+ #
+ # Example:
+ #
+ # scanner = ScanHistory.new(code_lines: code_lines, block: block)
+ # scanner.scan(
+ # up: ->(_, _, _) { true },
+ # down: ->(_, _, _) { true }
+ # )
+ # scanner.changed? # => true
+ # expect(scanner.lines).to eq(code_lines)
+ #
+ # scanner.stash_changes
+ #
+ # expect(scanner.lines).to_not eq(code_lines)
+ class ScanHistory
+ attr_reader :before_index, :after_index
+
+ def initialize(code_lines:, block:)
+ @code_lines = code_lines
+ @history = [block]
+ refresh_index
+ end
+
+ def commit_if_changed
+ if changed?
+ @history << CodeBlock.new(lines: @code_lines[before_index..after_index])
+ end
+
+ self
+ end
+
+ # Discards any changes that have not been committed
+ def stash_changes
+ refresh_index
+ self
+ end
+
+ # Discard changes that have not been committed and revert the last commit
+ #
+ # Cannot revert the first commit
+ def revert_last_commit
+ if @history.length > 1
+ @history.pop
+ refresh_index
+ end
+
+ self
+ end
+
+ def changed?
+ @before_index != current.lines.first.index ||
+ @after_index != current.lines.last.index
+ end
+
+ # Iterates up and down
+ #
+ # Returns line, kw_count, end_count for each iteration
+ def scan(up:, down:)
+ kw_count = 0
+ end_count = 0
+
+ up_index = before_lines.reverse_each.take_while do |line|
+ kw_count += 1 if line.is_kw?
+ end_count += 1 if line.is_end?
+ up.call(line, kw_count, end_count)
+ end.last&.index
+
+ kw_count = 0
+ end_count = 0
+
+ down_index = after_lines.each.take_while do |line|
+ kw_count += 1 if line.is_kw?
+ end_count += 1 if line.is_end?
+ down.call(line, kw_count, end_count)
+ end.last&.index
+
+ @before_index = if up_index && up_index < @before_index
+ up_index
+ else
+ @before_index
+ end
+
+ @after_index = if down_index && down_index > @after_index
+ down_index
+ else
+ @after_index
+ end
+
+ self
+ end
+
+ def next_up
+ return nil if @before_index <= 0
+
+ @code_lines[@before_index - 1]
+ end
+
+ def next_down
+ return nil if @after_index >= @code_lines.length
+
+ @code_lines[@after_index + 1]
+ end
+
+ def lines
+ @code_lines[@before_index..@after_index]
+ end
+
+ private def before_lines
+ @code_lines[0...@before_index] || []
+ end
+
+ # Returns an array of all the CodeLines that exist after
+ # the currently scanned block
+ private def after_lines
+ @code_lines[@after_index.next..] || []
+ end
+
+ private def current
+ @history.last
+ end
+
+ private def refresh_index
+ @before_index = current.lines.first.index
+ @after_index = current.lines.last.index
+ self
+ end
+ end
+end
diff --git a/lib/syntax_suggest/syntax_suggest.gemspec b/lib/syntax_suggest/syntax_suggest.gemspec
index acf9be7710..44e458aaad 100644
--- a/lib/syntax_suggest/syntax_suggest.gemspec
+++ b/lib/syntax_suggest/syntax_suggest.gemspec
@@ -14,12 +14,12 @@ Gem::Specification.new do |spec|
spec.summary = "Find syntax errors in your source in a snap"
spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it'
- spec.homepage = "https://github.com/zombocom/syntax_suggest.git"
+ spec.homepage = "https://github.com/ruby/syntax_suggest.git"
spec.license = "MIT"
- spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0")
spec.metadata["homepage_uri"] = spec.homepage
- spec.metadata["source_code_uri"] = "https://github.com/zombocom/syntax_suggest.git"
+ spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git"
# Specify which files should be added to the gem when it is released.
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
@@ -27,6 +27,6 @@ Gem::Specification.new do |spec|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|assets)/}) }
end
spec.bindir = "exe"
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+ spec.executables = ["syntax_suggest"]
spec.require_paths = ["lib"]
end
diff --git a/lib/syntax_suggest/token.rb b/lib/syntax_suggest/token.rb
new file mode 100644
index 0000000000..fc52639b1f
--- /dev/null
+++ b/lib/syntax_suggest/token.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ # Value object for accessing lex values
+ #
+ # This lex:
+ #
+ # [IDENTIFIER(1,0)-(1,8)("describe"), 32]
+ #
+ # Would translate into:
+ #
+ # lex.location # => (1,0)-(1,8)
+ # lex.type # => :IDENTIFIER
+ # lex.token # => "describe"
+ class Token
+ attr_reader :location, :type, :value
+
+ KW_TYPES = %i[
+ KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL
+ KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP
+ ].to_set.freeze
+ private_constant :KW_TYPES
+
+ def initialize(prism_token, previous_prism_token, visitor)
+ @location = prism_token.location
+ @type = prism_token.type
+ @value = prism_token.value
+
+ # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE
+ # https://github.com/ruby/prism/issues/3940
+ symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN
+ @is_kw = KW_TYPES.include?(@type)
+ @is_kw = false if symbol_content || visitor.endless_def_keyword_offsets.include?(@location.start_offset)
+ @is_end = @type == :KEYWORD_END
+ end
+
+ def line
+ @location.start_line
+ end
+
+ def is_end?
+ @is_end
+ end
+
+ def is_kw?
+ @is_kw
+ end
+ end
+end
diff --git a/lib/syntax_suggest/version.rb b/lib/syntax_suggest/version.rb
index a5176dcf2e..9114a079f6 100644
--- a/lib/syntax_suggest/version.rb
+++ b/lib/syntax_suggest/version.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: true
module SyntaxSuggest
- VERSION = "0.0.1"
+ VERSION = "3.0.0"
end
diff --git a/lib/syntax_suggest/visitor.rb b/lib/syntax_suggest/visitor.rb
new file mode 100644
index 0000000000..6e25f7239c
--- /dev/null
+++ b/lib/syntax_suggest/visitor.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+module SyntaxSuggest
+ # Walks the Prism AST to extract structural info that cannot be reliably determined from tokens
+ # alone.
+ #
+ # Such as the location of lines that must be logically joined so the search algorithm will
+ # treat them as one. Example:
+ #
+ # source = <<~RUBY
+ # User # 1
+ # .where(name: "Earlopain") # 2
+ # .first # 3
+ # RUBY
+ # ast, _tokens = Prism.parse_lex(source).value
+ # visitor = Visitor.new
+ # visitor.visit(ast)
+ # visitor.consecutive_lines # => Set[2, 1]
+ #
+ # This output means that line 1 and line 2 need to be joined with their next line.
+ #
+ # And determining the location of "endless" method definitions. For example:
+ #
+ # source = <<~RUBY
+ # def cube(x)
+ # x * x * x
+ # end
+ # def square(x) = x * x # 1
+ # RUBY
+ #
+ # ast, _tokens = Prism.parse_lex(source).value
+ # visitor = Visitor.new
+ # visitor.visit(ast)
+ # visitor.endless_def_keyword_offsets # => Set[28]
+ class Visitor < Prism::Visitor
+ attr_reader :endless_def_keyword_offsets, :consecutive_lines
+
+ def initialize
+ @endless_def_keyword_offsets = Set.new
+ @consecutive_lines = Set.new
+ end
+
+ # Called by Prism::Visitor for every method-call node in the AST
+ # (e.g. `foo.bar`, `foo.bar.baz`).
+ def visit_call_node(node)
+ receiver_loc = node.receiver&.location
+ call_operator_loc = node.call_operator_loc
+ message_loc = node.message_loc
+ if receiver_loc && call_operator_loc && message_loc
+ # dot-leading (dot on the next line)
+ # foo # line 1 - consecutive
+ # .bar # line 2
+ if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line
+ (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line|
+ @consecutive_lines << line
+ end
+ end
+
+ # dot-trailing (dot on the same line as the receiver)
+ # foo. # line 1 - consecutive
+ # bar # line 2
+ if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line
+ (call_operator_loc.start_line..message_loc.start_line - 1).each do |line|
+ @consecutive_lines << line
+ end
+ end
+ end
+ super
+ end
+
+ # Called by Prism::Visitor for every `def` node in the AST.
+ # Records the keyword start location for endless method definitions
+ # like `def foo = 123`. These are valid without a matching `end`,
+ # so Token must exclude them when deciding if a line is a keyword.
+ def visit_def_node(node)
+ @endless_def_keyword_offsets << node.def_keyword_loc.start_offset if node.equal_loc
+ super
+ end
+ end
+end