diff options
Diffstat (limited to 'lib/syntax_suggest')
28 files changed, 910 insertions, 662 deletions
diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index 5b725e13d7..5054efa888 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -5,9 +5,11 @@ require_relative "version" require "tmpdir" require "stringio" require "pathname" -require "ripper" require "timeout" +# Prism is the new parser, replacing Ripper +require "prism" + module SyntaxSuggest # Used to indicate a default value that cannot # be confused with another input. @@ -78,7 +80,7 @@ module SyntaxSuggest code_lines: search.code_lines ).call rescue Timeout::Error => e - io.puts "Search timed out SYNTAX_SUGGEST_TIMEOUT=#{timeout}, run with DEBUG=1 for more info" + io.puts "Search timed out SYNTAX_SUGGEST_TIMEOUT=#{timeout}, run with SYNTAX_SUGGEST_DEBUG=1 for more info" io.puts e.backtrace.first(3).join($/) end @@ -91,7 +93,9 @@ module SyntaxSuggest dir = Pathname(dir) dir.join(time).tap { |path| path.mkpath - FileUtils.ln_sf(time, dir.join("last")) + alias_dir = dir.join("last") + FileUtils.rm_rf(alias_dir) if alias_dir.exist? + FileUtils.ln_sf(time, alias_dir) } end @@ -117,11 +121,7 @@ module SyntaxSuggest def self.valid_without?(without_lines:, code_lines:) lines = code_lines - Array(without_lines).flatten - if lines.empty? - true - else - valid?(lines) - end + lines.empty? || valid?(lines) end # SyntaxSuggest.invalid? [Private] @@ -131,7 +131,7 @@ module SyntaxSuggest source = source.join if source.is_a?(Array) source = source.to_s - Ripper.new(source).tap(&:parse).error? + Prism.parse(source).failure? end # SyntaxSuggest.valid? [Private] @@ -185,11 +185,10 @@ require_relative "explain_syntax" require_relative "clean_document" # Helpers -require_relative "lex_all" require_relative "code_line" require_relative "code_block" require_relative "block_expand" -require_relative "ripper_errors" +require_relative "mini_stringio" require_relative "priority_queue" require_relative "unvisited_lines" require_relative "around_block_scan" @@ -197,3 +196,5 @@ require_relative "priority_engulf_queue" require_relative "pathname_from_message" require_relative "display_invalid_blocks" require_relative "parse_blocks_from_indent_line" +require_relative "visitor" +require_relative "token" diff --git a/lib/syntax_suggest/around_block_scan.rb b/lib/syntax_suggest/around_block_scan.rb index 2a57d1b19e..dd9af729c5 100644 --- a/lib/syntax_suggest/around_block_scan.rb +++ b/lib/syntax_suggest/around_block_scan.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require_relative "scan_history" + module SyntaxSuggest # This class is useful for exploring contents before and after # a block @@ -24,201 +26,207 @@ module SyntaxSuggest # puts scan.before_index # => 0 # puts scan.after_index # => 3 # - # Contents can also be filtered using AroundBlockScan#skip - # - # To grab the next surrounding indentation use AroundBlockScan#scan_adjacent_indent class AroundBlockScan def initialize(code_lines:, block:) @code_lines = code_lines - @orig_before_index = block.lines.first.index - @orig_after_index = block.lines.last.index @orig_indent = block.current_indent - @skip_array = [] - @after_array = [] - @before_array = [] - @stop_after_kw = false - @skip_hidden = false - @skip_empty = false + @stop_after_kw = false + @force_add_empty = false + @force_add_hidden = false + @target_indent = nil + + @scanner = ScanHistory.new(code_lines: code_lines, block: block) + end + + # When using this flag, `scan_while` will + # bypass the block it's given and always add a + # line that responds truthy to `CodeLine#hidden?` + # + # Lines are hidden when they've been evaluated by + # the parser as part of a block and found to contain + # valid code. + def force_add_hidden + @force_add_hidden = true + self end - def skip(name) - case name - when :hidden? - @skip_hidden = true - when :empty? - @skip_empty = true - else - raise "Unsupported skip #{name}" - end + # When using this flag, `scan_while` will + # bypass the block it's given and always add a + # line that responds truthy to `CodeLine#empty?` + # + # Empty lines contain no code, only whitespace such + # as leading spaces a newline. + def force_add_empty + @force_add_empty = true self end + # Tells `scan_while` to look for mismatched keyword/end-s + # + # When scanning up, if we see more keywords then end-s it will + # stop. This might happen when scanning outside of a method body. + # the first scan line up would be a keyword and this setting would + # trigger a stop. + # + # When scanning down, stop if there are more end-s than keywords. def stop_after_kw @stop_after_kw = true self end + # Main work method + # + # The scan_while method takes a block that yields lines above and + # below the block. If the yield returns true, the @before_index + # or @after_index are modified to include the matched line. + # + # In addition to yielding individual lines, the internals of this + # object give a mini DSL to handle common situations such as + # stopping if we've found a keyword/end mis-match in one direction + # or the other. def scan_while - stop_next = false - - kw_count = 0 - end_count = 0 - index = before_lines.reverse_each.take_while do |line| - next false if stop_next - next true if @skip_hidden && line.hidden? - next true if @skip_empty && line.empty? + stop_next_up = false + stop_next_down = false - kw_count += 1 if line.is_kw? - end_count += 1 if line.is_end? - if @stop_after_kw && kw_count > end_count - stop_next = true - end - - yield line - end.last&.index + @scanner.scan( + up: ->(line, kw_count, end_count) { + next false if stop_next_up + next true if @force_add_hidden && line.hidden? + next true if @force_add_empty && line.empty? - if index && index < before_index - @before_index = index - end + if @stop_after_kw && kw_count > end_count + stop_next_up = true + end - stop_next = false - kw_count = 0 - end_count = 0 - index = after_lines.take_while do |line| - next false if stop_next - next true if @skip_hidden && line.hidden? - next true if @skip_empty && line.empty? + yield line + }, + down: ->(line, kw_count, end_count) { + next false if stop_next_down + next true if @force_add_hidden && line.hidden? + next true if @force_add_empty && line.empty? - kw_count += 1 if line.is_kw? - end_count += 1 if line.is_end? - if @stop_after_kw && end_count > kw_count - stop_next = true - end + if @stop_after_kw && end_count > kw_count + stop_next_down = true + end - yield line - end.last&.index + yield line + } + ) - if index && index > after_index - @after_index = index - end self end - def capture_neighbor_context - lines = [] + # Scanning is intentionally conservative because + # we have no way of rolling back an aggressive block (at this time) + # + # If a block was stopped for some trivial reason, (like an empty line) + # but the next line would have caused it to be balanced then we + # can check that condition and grab just one more line either up or + # down. + # + # For example, below if we're scanning up, line 2 might cause + # the scanning to stop. This is because empty lines might + # denote logical breaks where the user intended to chunk code + # which is a good place to stop and check validity. Unfortunately + # it also means we might have a "dangling" keyword or end. + # + # 1 def bark + # 2 + # 3 end + # + # If lines 2 and 3 are in the block, then when this method is + # run it would see it is unbalanced, but that acquiring line 1 + # would make it balanced, so that's what it does. + def lookahead_balance_one_line kw_count = 0 end_count = 0 - before_lines.reverse_each do |line| - next if line.empty? - break if line.indent < @orig_indent - next if line.indent != @orig_indent - + lines.each do |line| kw_count += 1 if line.is_kw? end_count += 1 if line.is_end? - if kw_count != 0 && kw_count == end_count - lines << line - break - end - - lines << line end - lines.reverse! - - kw_count = 0 - end_count = 0 - after_lines.each do |line| - next if line.empty? - break if line.indent < @orig_indent - next if line.indent != @orig_indent - - kw_count += 1 if line.is_kw? - end_count += 1 if line.is_end? - if kw_count != 0 && kw_count == end_count - lines << line - break + return self if kw_count == end_count # nothing to balance + + @scanner.commit_if_changed # Rollback point if we don't find anything to optimize + + # Try to eat up empty lines + @scanner.scan( + up: ->(line, _, _) { line.hidden? || line.empty? }, + down: ->(line, _, _) { line.hidden? || line.empty? } + ) + + # More ends than keywords, check if we can balance expanding up + next_up = @scanner.next_up + next_down = @scanner.next_down + case end_count - kw_count + when 1 + if next_up&.is_kw? && next_up.indent >= @target_indent + @scanner.scan( + up: ->(line, _, _) { line == next_up }, + down: ->(line, _, _) { false } + ) + @scanner.commit_if_changed end - - lines << line - end - - lines - end - - def on_falling_indent - last_indent = @orig_indent - before_lines.reverse_each do |line| - next if line.empty? - if line.indent < last_indent - yield line - last_indent = line.indent - end - end - - last_indent = @orig_indent - after_lines.each do |line| - next if line.empty? - if line.indent < last_indent - yield line - last_indent = line.indent + when -1 + if next_down&.is_end? && next_down.indent >= @target_indent + @scanner.scan( + up: ->(line, _, _) { false }, + down: ->(line, _, _) { line == next_down } + ) + @scanner.commit_if_changed end end - end - - def scan_neighbors - scan_while { |line| line.not_empty? && line.indent >= @orig_indent } - end + # Rollback any uncommitted changes + @scanner.stash_changes - def next_up - @code_lines[before_index.pred] + self end - def next_down - @code_lines[after_index.next] + # Finds code lines at the same or greater indentation and adds them + # to the block + def scan_neighbors_not_empty + @target_indent = @orig_indent + scan_while { |line| line.not_empty? && line.indent >= @target_indent } end + # Scan blocks based on indentation of next line above/below block + # + # Determines indentaion of the next line above/below the current block. + # + # Normally this is called when a block has expanded to capture all "neighbors" + # at the same (or greater) indentation and needs to expand out. For example + # the `def/end` lines surrounding a method. def scan_adjacent_indent before_after_indent = [] - before_after_indent << (next_up&.indent || 0) - before_after_indent << (next_down&.indent || 0) - indent = before_after_indent.min - scan_while { |line| line.not_empty? && line.indent >= indent } + before_after_indent << (@scanner.next_up&.indent || 0) + before_after_indent << (@scanner.next_down&.indent || 0) - self - end + @target_indent = before_after_indent.min + scan_while { |line| line.not_empty? && line.indent >= @target_indent } - def start_at_next_line - before_index - after_index - @before_index -= 1 - @after_index += 1 self end + # Return the currently matched lines as a `CodeBlock` + # + # When a `CodeBlock` is created it will gather metadata about + # itself, so this is not a free conversion. Avoid allocating + # more CodeBlock's than needed def code_block CodeBlock.new(lines: lines) end + # Returns the lines matched by the current scan as an + # array of CodeLines def lines - @code_lines[before_index..after_index] - end - - def before_index - @before_index ||= @orig_before_index - end - - def after_index - @after_index ||= @orig_after_index - end - - private def before_lines - @code_lines[0...before_index] || [] + @scanner.lines end - private def after_lines - @code_lines[after_index.next..-1] || [] + # Manageable rspec errors + def inspect + "#<#{self.class}:0x0000123843lol >" end end end diff --git a/lib/syntax_suggest/block_expand.rb b/lib/syntax_suggest/block_expand.rb index 396b2c3a1a..2751ae2a64 100644 --- a/lib/syntax_suggest/block_expand.rb +++ b/lib/syntax_suggest/block_expand.rb @@ -35,30 +35,121 @@ module SyntaxSuggest @code_lines = code_lines end + # Main interface. Expand current indentation, before + # expanding to a lower indentation def call(block) if (next_block = expand_neighbors(block)) - return next_block + next_block + else + expand_indent(block) end - - expand_indent(block) end + # Expands code to the next lowest indentation + # + # For example: + # + # 1 def dog + # 2 print "dog" + # 3 end + # + # If a block starts on line 2 then it has captured all it's "neighbors" (code at + # the same indentation or higher). To continue expanding, this block must capture + # lines one and three which are at a different indentation level. + # + # This method allows fully expanded blocks to decrease their indentation level (so + # they can expand to capture more code up and down). It does this conservatively + # as there's no undo (currently). def expand_indent(block) - AroundBlockScan.new(code_lines: @code_lines, block: block) - .skip(:hidden?) + now = AroundBlockScan.new(code_lines: @code_lines, block: block) + .force_add_hidden .stop_after_kw .scan_adjacent_indent - .code_block + + now.lookahead_balance_one_line + + now.code_block end + # A neighbor is code that is at or above the current indent line. + # + # First we build a block with all neighbors. If we can't go further + # then we decrease the indentation threshold and expand via indentation + # i.e. `expand_indent` + # + # Handles two general cases. + # + # ## Case #1: Check code inside of methods/classes/etc. + # + # It's important to note, that not everything in a given indentation level can be parsed + # as valid code even if it's part of valid code. For example: + # + # 1 hash = { + # 2 name: "richard", + # 3 dog: "cinco", + # 4 } + # + # In this case lines 2 and 3 will be neighbors, but they're invalid until `expand_indent` + # is called on them. + # + # When we are adding code within a method or class (at the same indentation level), + # use the empty lines to denote the programmer intended logical chunks. + # Stop and check each one. For example: + # + # 1 def dog + # 2 print "dog" + # 3 + # 4 hash = { + # 5 end + # + # If we did not stop parsing at empty newlines then the block might mistakenly grab all + # the contents (lines 2, 3, and 4) and report them as being problems, instead of only + # line 4. + # + # ## Case #2: Expand/grab other logical blocks + # + # Once the search algorithm has converted all lines into blocks at a given indentation + # it will then `expand_indent`. Once the blocks that generates are expanded as neighbors + # we then begin seeing neighbors being other logical blocks i.e. a block's neighbors + # may be another method or class (something with keywords/ends). + # + # For example: + # + # 1 def bark + # 2 + # 3 end + # 4 + # 5 def sit + # 6 end + # + # In this case if lines 4, 5, and 6 are in a block when it tries to expand neighbors + # it will expand up. If it stops after line 2 or 3 it may cause problems since there's a + # valid kw/end pair, but the block will be checked without it. + # + # We try to resolve this edge case with `lookahead_balance_one_line` below. def expand_neighbors(block) - expanded_lines = AroundBlockScan.new(code_lines: @code_lines, block: block) - .skip(:hidden?) + now = AroundBlockScan.new(code_lines: @code_lines, block: block) + + # Initial scan + now + .force_add_hidden .stop_after_kw - .scan_neighbors - .scan_while { |line| line.empty? } # Slurp up empties + .scan_neighbors_not_empty + + # Slurp up empties + now + .scan_while { |line| line.empty? } + + # If next line is kw and it will balance us, take it + expanded_lines = now + .lookahead_balance_one_line .lines + # Don't allocate a block if it won't be used + # + # If nothing was taken, return nil to indicate that status + # used in `def call` to determine if + # we need to expand up/out (`expand_indent`) if block.lines == expanded_lines nil else @@ -66,7 +157,7 @@ module SyntaxSuggest end end - # Managable rspec errors + # Manageable rspec errors def inspect "#<SyntaxSuggest::CodeBlock:0x0000123843lol >" end diff --git a/lib/syntax_suggest/capture/before_after_keyword_ends.rb b/lib/syntax_suggest/capture/before_after_keyword_ends.rb new file mode 100644 index 0000000000..f53c57a4d1 --- /dev/null +++ b/lib/syntax_suggest/capture/before_after_keyword_ends.rb @@ -0,0 +1,85 @@ +# frozen_string_literal: true + +module SyntaxSuggest + module Capture + # Shows surrounding kw/end pairs + # + # The purpose of showing these extra pairs is due to cases + # of ambiguity when only one visible line is matched. + # + # For example: + # + # 1 class Dog + # 2 def bark + # 4 def eat + # 5 end + # 6 end + # + # In this case either line 2 could be missing an `end` or + # line 4 was an extra line added by mistake (it happens). + # + # When we detect the above problem it shows the issue + # as only being on line 2 + # + # 2 def bark + # + # Showing "neighbor" keyword pairs gives extra context: + # + # 2 def bark + # 4 def eat + # 5 end + # + # + # Example: + # + # lines = BeforeAfterKeywordEnds.new( + # block: block, + # code_lines: code_lines + # ).call() + # + class BeforeAfterKeywordEnds + def initialize(code_lines:, block:) + @scanner = ScanHistory.new(code_lines: code_lines, block: block) + @original_indent = block.current_indent + end + + def call + lines = [] + + @scanner.scan( + up: ->(line, kw_count, end_count) { + next true if line.empty? + break if line.indent < @original_indent + next true if line.indent != @original_indent + + # If we're going up and have one complete kw/end pair, stop + if kw_count != 0 && kw_count == end_count + lines << line + break + end + + lines << line if line.is_kw? || line.is_end? + true + }, + down: ->(line, kw_count, end_count) { + next true if line.empty? + break if line.indent < @original_indent + next true if line.indent != @original_indent + + # if we're going down and have one complete kw/end pair,stop + if kw_count != 0 && kw_count == end_count + lines << line + break + end + + lines << line if line.is_kw? || line.is_end? + true + } + ) + @scanner.stash_changes + + lines + end + end + end +end diff --git a/lib/syntax_suggest/capture/falling_indent_lines.rb b/lib/syntax_suggest/capture/falling_indent_lines.rb new file mode 100644 index 0000000000..1e046b2ba5 --- /dev/null +++ b/lib/syntax_suggest/capture/falling_indent_lines.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +module SyntaxSuggest + module Capture + # Shows the context around code provided by "falling" indentation + # + # If this is the original code lines: + # + # class OH + # def hello + # it "foo" do + # end + # end + # + # And this is the line that is captured + # + # it "foo" do + # + # It will yield its surrounding context: + # + # class OH + # def hello + # end + # end + # + # Example: + # + # FallingIndentLines.new( + # block: block, + # code_lines: @code_lines + # ).call do |line| + # @lines_to_output << line + # end + # + class FallingIndentLines + def initialize(code_lines:, block:) + @lines = nil + @scanner = ScanHistory.new(code_lines: code_lines, block: block) + @original_indent = block.current_indent + end + + def call(&yieldable) + last_indent_up = @original_indent + last_indent_down = @original_indent + + @scanner.commit_if_changed + @scanner.scan( + up: ->(line, _, _) { + next true if line.empty? + + if line.indent < last_indent_up + yieldable.call(line) + last_indent_up = line.indent + end + true + }, + down: ->(line, _, _) { + next true if line.empty? + + if line.indent < last_indent_down + yieldable.call(line) + last_indent_down = line.indent + end + true + } + ) + @scanner.stash_changes + end + end + end +end diff --git a/lib/syntax_suggest/capture_code_context.rb b/lib/syntax_suggest/capture_code_context.rb index c74a366a25..5de9ec09cc 100644 --- a/lib/syntax_suggest/capture_code_context.rb +++ b/lib/syntax_suggest/capture_code_context.rb @@ -1,13 +1,21 @@ # frozen_string_literal: true module SyntaxSuggest + module Capture + end +end + +require_relative "capture/falling_indent_lines" +require_relative "capture/before_after_keyword_ends" + +module SyntaxSuggest # Turns a "invalid block(s)" into useful context # # There are three main phases in the algorithm: # # 1. Sanitize/format input source # 2. Search for invalid blocks - # 3. Format invalid blocks into something meaninful + # 3. Format invalid blocks into something meaningful # # This class handles the third part. # @@ -18,7 +26,7 @@ module SyntaxSuggest # they can't add extra data that's not present. # # In the case of known ambiguious cases, this class adds context - # back to the ambiguitiy so the programmer has full information. + # back to the ambiguity so the programmer has full information. # # Beyond handling these ambiguities, it also captures surrounding # code context information: @@ -55,6 +63,10 @@ module SyntaxSuggest capture_falling_indent(block) end + sorted_lines + end + + def sorted_lines @lines_to_output.select!(&:not_empty?) @lines_to_output.uniq! @lines_to_output.sort! @@ -76,12 +88,11 @@ module SyntaxSuggest # end # end # - # def capture_falling_indent(block) - AroundBlockScan.new( + Capture::FallingIndentLines.new( block: block, code_lines: @code_lines - ).on_falling_indent do |line| + ).call do |line| @lines_to_output << line end end @@ -116,9 +127,10 @@ module SyntaxSuggest def capture_before_after_kws(block) return unless block.visible_lines.count == 1 - around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block) - .start_at_next_line - .capture_neighbor_context + around_lines = Capture::BeforeAfterKeywordEnds.new( + code_lines: @code_lines, + block: block + ).call around_lines -= block.lines @@ -137,10 +149,10 @@ module SyntaxSuggest # puts "woof" # 3 # end # 4 # - # However due to https://github.com/zombocom/syntax_suggest/issues/32 + # However due to https://github.com/ruby/syntax_suggest/issues/32 # the problem line will be identified as: # - # ❯ class Dog # 1 + # > class Dog # 1 # # Because lines 2, 3, and 4 are technically valid code and are expanded # first, deemed valid, and hidden. We need to un-hide the matching end @@ -200,7 +212,7 @@ module SyntaxSuggest # # the problem line will be identified as: # - # ❯ end # 4 + # > end # 4 # # This happens because lines 1, 2, and 3 are technically valid code and are expanded # first, deemed valid, and hidden. We need to un-hide the matching keyword on diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index b572189259..94c68d8ad4 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -10,7 +10,7 @@ module SyntaxSuggest # # 1. Sanitize/format input source # 2. Search for invalid blocks - # 3. Format invalid blocks into something meaninful + # 3. Format invalid blocks into something meaningful # # This class handles the first part. # @@ -47,9 +47,9 @@ module SyntaxSuggest # ## Heredocs # # A heredoc is an way of defining a multi-line string. They can cause many - # problems. If left as a single line, Ripper would try to parse the contents + # problems. If left as a single line, the parser would try to parse the contents # as ruby code rather than as a string. Even without this problem, we still - # hit an issue with indentation + # hit an issue with indentation: # # 1 foo = <<~HEREDOC # 2 "Be yourself; everyone else is already taken."" @@ -66,27 +66,9 @@ module SyntaxSuggest # # All of these problems are fixed by joining the whole heredoc into a single # line. - # - # ## Comments and whitespace - # - # Comments can throw off the way the lexer tells us that the line - # logically belongs with the next line. This is valid ruby but - # results in a different lex output than before: - # - # 1 User. - # 2 where(name: "schneems"). - # 3 # Comment here - # 4 first - # - # To handle this we can replace comment lines with empty lines - # and then re-lex the source. This removal and re-lexing preserves - # line index and document size, but generates an easier to work with - # document. - # class CleanDocument def initialize(source:) - lines = clean_sweep(source: source) - @document = CodeLine.from_source(lines.join, lines: lines) + @document = CodeLine.from_source(source) end # Call all of the document "cleaners" @@ -110,60 +92,6 @@ module SyntaxSuggest @document.join end - # Remove comments and whitespace only lines - # - # replace with empty newlines - # - # source = <<~'EOM' - # # Comment 1 - # puts "hello" - # # Comment 2 - # puts "world" - # EOM - # - # lines = CleanDocument.new(source: source).lines - # expect(lines[0].to_s).to eq("\n") - # expect(lines[1].to_s).to eq("puts "hello") - # expect(lines[2].to_s).to eq("\n") - # expect(lines[3].to_s).to eq("puts "world") - # - # Important: This must be done before lexing. - # - # After this change is made, we lex the document because - # removing comments can change how the doc is parsed. - # - # For example: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # # comment - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(1) - # - # After the comment is removed: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(2) - # - def clean_sweep(source:) - source.lines.map do |line| - if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs - $/ - else - line - end - end - end - # Smushes all heredoc lines into one line # # source = <<~'EOM' @@ -180,11 +108,11 @@ module SyntaxSuggest start_index_stack = [] heredoc_beg_end_index = [] lines.each do |line| - line.lex.each do |lex_value| - case lex_value.type - when :on_heredoc_beg + line.tokens.each do |token| + case token.type + when :HEREDOC_START start_index_stack << line.index - when :on_heredoc_end + when :HEREDOC_END start_index = start_index_stack.pop end_index = line.index heredoc_beg_end_index << [start_index, end_index] @@ -210,20 +138,10 @@ module SyntaxSuggest # expect(lines[0].to_s).to eq(source) # expect(lines[1].to_s).to eq("") # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - # def join_consecutive! - consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| - take_while_including(code_line.index..-1) do |line| - line.ignore_newline_not_beg? + consecutive_groups = @document.select(&:consecutive?).map do |code_line| + take_while_including(code_line.index..) do |line| + line.consecutive? end end @@ -243,7 +161,7 @@ module SyntaxSuggest # expect(lines[1].to_s).to eq("") def join_trailing_slash! trailing_groups = @document.select(&:trailing_slash?).map do |code_line| - take_while_including(code_line.index..-1) { |x| x.trailing_slash? } + take_while_including(code_line.index..) { |x| x.trailing_slash? } end join_groups(trailing_groups) self @@ -265,22 +183,23 @@ module SyntaxSuggest groups.each do |lines| line = lines.first - # Handle the case of multiple groups in a a row + # Handle the case of multiple groups in a row # if one is already replaced, move on next if @document[line.index].empty? # Join group into the first line @document[line.index] = CodeLine.new( - lex: lines.map(&:lex).flatten, + tokens: lines.map(&:tokens).flatten, line: lines.join, - index: line.index + index: line.index, + consecutive: false ) # Hide the rest of the lines - lines[1..-1].each do |line| + lines[1..].each do |line| # The above lines already have newlines in them, if add more # then there will be double newline, use an empty line instead - @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false) end end self @@ -291,7 +210,7 @@ module SyntaxSuggest # Like `take_while` except when it stops # iterating, it also returns the line # that caused it to stop - def take_while_including(range = 0..-1) + def take_while_including(range = 0..) take_next_and_stop = false @document[range].take_while do |line| next if take_next_and_stop diff --git a/lib/syntax_suggest/cli.rb b/lib/syntax_suggest/cli.rb index b89fa5d013..967f77bf70 100644 --- a/lib/syntax_suggest/cli.rb +++ b/lib/syntax_suggest/cli.rb @@ -65,6 +65,7 @@ module SyntaxSuggest ) if display.document_ok? + @io.puts "Syntax OK" @exit_obj.exit(0) else @exit_obj.exit(1) @@ -91,8 +92,8 @@ module SyntaxSuggest # ... - ❯ 10 defdog - ❯ 15 end + > 10 defdog + > 15 end ENV options: diff --git a/lib/syntax_suggest/code_block.rb b/lib/syntax_suggest/code_block.rb index 61e7986da4..d842890300 100644 --- a/lib/syntax_suggest/code_block.rb +++ b/lib/syntax_suggest/code_block.rb @@ -81,7 +81,7 @@ module SyntaxSuggest # lines then the result cannot be invalid # # That means there's no reason to re-check all - # lines with ripper (which is expensive). + # lines with the parser (which is expensive). # Benchmark in commit message @valid = if lines.all? { |l| l.hidden? || l.empty? } true diff --git a/lib/syntax_suggest/code_frontier.rb b/lib/syntax_suggest/code_frontier.rb index 8e93b32514..38d5375ef4 100644 --- a/lib/syntax_suggest/code_frontier.rb +++ b/lib/syntax_suggest/code_frontier.rb @@ -8,7 +8,7 @@ module SyntaxSuggest # # 1. Sanitize/format input source # 2. Search for invalid blocks - # 3. Format invalid blocks into something meaninful + # 3. Format invalid blocks into something meaningful # # The Code frontier is a critical part of the second step # @@ -117,7 +117,7 @@ module SyntaxSuggest if ENV["SYNTAX_SUGGEST_DEBUG"] puts "```" - puts @queue.peek.to_s + puts @queue.peek puts "```" puts " @frontier indent: #{frontier_indent}" puts " @unvisited indent: #{unvisited_indent}" diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index dc738ab128..7fb1aae26a 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -26,34 +26,66 @@ module SyntaxSuggest # Returns an array of CodeLine objects # from the source string - def self.from_source(source, lines: nil) - lines ||= source.lines - lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } - lines.map.with_index do |line, index| + def self.from_source(source) + source = +source + parse_result = Prism.parse_lex(source) + ast, tokens = parse_result.value + + clean_comments!(source, parse_result.comments) + + visitor = Visitor.new + visitor.visit(ast) + tokens.sort_by! { |token, _state| token.location.start_line } + + prev_token = nil + tokens.map! do |token, _state| + prev_token = Token.new(token, prev_token, visitor) + end + + tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token } + source.lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, - lex: lex_array_for_line[index + 1] + tokens: tokens_for_line[index + 1], + consecutive: visitor.consecutive_lines.include?(index + 1) ) end end - attr_reader :line, :index, :lex, :line_number, :indent - def initialize(line:, index:, lex:) - @lex = lex + # Remove comments that apear on their own in source. They will never be the cause + # of syntax errors and are just visual noise. Example: + # + # source = +<<~RUBY + # # Comment-only line + # foo # Inline comment + # RUBY + # CodeLine.clean_comments!(source, Prism.parse(source).comments) + # source # => "\nfoo # Inline comment\n" + def self.clean_comments!(source, comments) + # Iterate backwards since we are modifying the source in place and must preserve + # the offsets. Prism comments are sorted by their location in the source. + comments.reverse_each do |comment| + next if comment.trailing? + source.bytesplice(comment.location.start_offset, comment.location.length, "") + end + end + + attr_reader :line, :index, :tokens, :line_number, :indent + def initialize(line:, index:, tokens:, consecutive:) + @tokens = tokens @line = line @index = index + @consecutive = consecutive @original = line @line_number = @index + 1 strip_line = line.dup strip_line.lstrip! - if strip_line.empty? - @empty = true - @indent = 0 + @indent = if (@empty = strip_line.empty?) + line.length - 1 # Newline removed from strip_line is not "whitespace" else - @empty = false - @indent = line.length - strip_line.length + line.length - strip_line.length end set_kw_end @@ -153,29 +185,16 @@ module SyntaxSuggest index <=> other.index end - # [Not stable API] - # - # Lines that have a `on_ignored_nl` type token and NOT - # a `BEG` type seem to be a good proxy for the ability - # to join multiple lines into one. - # - # This predicate method is used to determine when those - # two criteria have been met. - # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - def ignore_newline_not_beg? - @ignore_newline_not_beg + # Can this line be logically joined together + # with the following line? Determined by walking + # the AST + def consecutive? + @consecutive end - # Determines if the given line has a trailing slash + # Determines if the given line has a trailing slash. + # Simply check if the line contains a backslash after + # the content of the last token. # # lines = CodeLine.from_source(<<~EOM) # it "foo" \ @@ -183,55 +202,19 @@ module SyntaxSuggest # expect(lines.first.trailing_slash?).to eq(true) # def trailing_slash? - last = @lex.last - return false unless last - return false unless last.type == :on_sp - - last.token == TRAILING_SLASH + return unless (last = @tokens.last) + @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil end - # Endless method detection - # - # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab - # Detecting a "oneliner" seems to need a state machine. - # This can be done by looking mostly at the "state" (last value): - # - # ENDFN -> BEG (token = '=' ) -> END - # private def set_kw_end - oneliner_count = 0 - in_oneliner_def = nil - kw_count = 0 end_count = 0 - @ignore_newline_not_beg = false - @lex.each do |lex| - kw_count += 1 if lex.is_kw? - end_count += 1 if lex.is_end? - - if lex.type == :on_ignored_nl - @ignore_newline_not_beg = !lex.expr_beg? - end - - if in_oneliner_def.nil? - in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) - elsif lex.state.allbits?(Ripper::EXPR_ENDFN) - # Continue - elsif lex.state.allbits?(Ripper::EXPR_BEG) - in_oneliner_def = :BODY if lex.token == "=" - elsif lex.state.allbits?(Ripper::EXPR_END) - # We found an endless method, count it - oneliner_count += 1 if in_oneliner_def == :BODY - - in_oneliner_def = nil - else - in_oneliner_def = nil - end + @tokens.each do |token| + kw_count += 1 if token.is_kw? + end_count += 1 if token.is_end? end - kw_count -= oneliner_count - @is_kw = (kw_count - end_count) > 0 @is_end = (end_count - kw_count) > 0 end diff --git a/lib/syntax_suggest/code_search.rb b/lib/syntax_suggest/code_search.rb index 2a86dfea90..7628dcd131 100644 --- a/lib/syntax_suggest/code_search.rb +++ b/lib/syntax_suggest/code_search.rb @@ -43,7 +43,7 @@ module SyntaxSuggest def initialize(source, record_dir: DEFAULT_VALUE) record_dir = if record_dir == DEFAULT_VALUE - ENV["SYNTAX_SUGGEST_RECORD_DIR"] || ENV["SYNTAX_SUGGEST_DEBUG"] ? "tmp" : nil + (ENV["SYNTAX_SUGGEST_RECORD_DIR"] || ENV["SYNTAX_SUGGEST_DEBUG"]) ? "tmp" : nil else record_dir end @@ -73,7 +73,7 @@ module SyntaxSuggest if ENV["SYNTAX_SUGGEST_DEBUG"] puts "\n\n==== #{filename} ====" puts "\n```#{block.starts_at}..#{block.ends_at}" - puts block.to_s + puts block puts "```" puts " block indent: #{block.current_indent}" end diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb index 40f5fe1375..ffbc922eed 100644 --- a/lib/syntax_suggest/core_ext.rb +++ b/lib/syntax_suggest/core_ext.rb @@ -1,101 +1,47 @@ # frozen_string_literal: true -# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require` -if SyntaxError.method_defined?(:detailed_message) - module SyntaxSuggest - class MiniStringIO - def initialize(isatty: $stderr.isatty) - @string = +"" - @isatty = isatty - end - - attr_reader :isatty - def puts(value = $/, **) - @string << value - end - - attr_reader :string - end - end - - SyntaxError.prepend Module.new { - def detailed_message(highlight: true, syntax_suggest: true, **kwargs) - return super unless syntax_suggest - - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - message = super - file = if highlight - SyntaxSuggest::PathnameFromMessage.new(super(highlight: false, **kwargs)).call.name - else - SyntaxSuggest::PathnameFromMessage.new(message).call.name - end - - io = SyntaxSuggest::MiniStringIO.new - - if file - SyntaxSuggest.call( - io: io, - source: file.read, - filename: file, - terminal: highlight - ) - annotation = io.string - - annotation + message - else +module SyntaxSuggest + # SyntaxSuggest.module_for_detailed_message [Private] + # + # Used to monkeypatch SyntaxError via Module.prepend + def self.module_for_detailed_message + Module.new { + def detailed_message(highlight: true, syntax_suggest: true, **kwargs) + return super unless syntax_suggest + + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + message = super + + if path + file = Pathname.new(path) + io = SyntaxSuggest::MiniStringIO.new + + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file, + terminal: highlight + ) + annotation = io.string + + annotation += "\n" unless annotation.end_with?("\n") + + annotation + message + else + message + end + rescue => e + if ENV["SYNTAX_SUGGEST_DEBUG"] + $stderr.warn(e.message) + $stderr.warn(e.backtrace) + end + + # Ignore internal errors message end - rescue => e - if ENV["SYNTAX_SUGGEST_DEBUG"] - $stderr.warn(e.message) - $stderr.warn(e.backtrace) - end - - # Ignore internal errors - message - end - } -else - autoload :Pathname, "pathname" - - # Monkey patch kernel to ensure that all `require` calls call the same - # method - module Kernel - module_function - - alias_method :syntax_suggest_original_require, :require - alias_method :syntax_suggest_original_require_relative, :require_relative - alias_method :syntax_suggest_original_load, :load - - def load(file, wrap = false) - syntax_suggest_original_load(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end - - def require(file) - syntax_suggest_original_require(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end - - def require_relative(file) - if Pathname.new(file).absolute? - syntax_suggest_original_require file - else - relative_from = caller_locations(1..1).first - relative_from_path = relative_from.absolute_path || relative_from.path - syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path) - end - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + } end end + +SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) diff --git a/lib/syntax_suggest/display_code_with_line_numbers.rb b/lib/syntax_suggest/display_code_with_line_numbers.rb index 23f4b2d1ee..a18d62e54b 100644 --- a/lib/syntax_suggest/display_code_with_line_numbers.rb +++ b/lib/syntax_suggest/display_code_with_line_numbers.rb @@ -14,8 +14,8 @@ module SyntaxSuggest # # => # 1 # 2 def cat - # ❯ 3 Dir.chdir - # ❯ 4 end + # > 3 Dir.chdir + # > 4 end # 5 end # 6 class DisplayCodeWithLineNumbers @@ -50,7 +50,7 @@ module SyntaxSuggest private def format(contents:, number:, empty:, highlight: false) string = +"" string << if highlight - "❯ " + "> " else " " end diff --git a/lib/syntax_suggest/display_invalid_blocks.rb b/lib/syntax_suggest/display_invalid_blocks.rb index bc1143f4b0..5e79b3a262 100644 --- a/lib/syntax_suggest/display_invalid_blocks.rb +++ b/lib/syntax_suggest/display_invalid_blocks.rb @@ -14,7 +14,7 @@ module SyntaxSuggest @filename = filename @code_lines = code_lines - @terminal = terminal == DEFAULT_VALUE ? io.isatty : terminal + @terminal = (terminal == DEFAULT_VALUE) ? io.isatty : terminal end def document_ok? @@ -23,7 +23,6 @@ module SyntaxSuggest def call if document_ok? - @io.puts "Syntax OK" return self end diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 142ed2e269..d7f5262ddb 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -1,8 +1,14 @@ # frozen_string_literal: true -require_relative "left_right_lex_count" +require_relative "left_right_token_count" module SyntaxSuggest + class GetParseErrors + def self.errors(source) + Prism.parse(source).errors.map(&:message) + end + end + # Explains syntax errors based on their source # # example: @@ -15,8 +21,8 @@ module SyntaxSuggest # # => "Unmatched keyword, missing `end' ?" # # When the error cannot be determined by lexical counting - # then ripper is run against the input and the raw ripper - # errors returned. + # then the parser is run against the input and the raw + # errors are returned. # # Example: # @@ -39,14 +45,14 @@ module SyntaxSuggest def initialize(code_lines:) @code_lines = code_lines - @left_right = LeftRightLexCount.new + @left_right = LeftRightTokenCount.new @missing = nil end def call @code_lines.each do |line| - line.lex.each do |lex| - @left_right.count_lex(lex) + line.tokens.each do |token| + @left_right.count_token(token) end end @@ -91,10 +97,10 @@ module SyntaxSuggest # Returns an array of syntax error messages # # If no missing pairs are found it falls back - # on the original ripper error messages + # on the original error messages def errors if missing.empty? - return RipperErrors.new(@code_lines.map(&:original).join).call.errors + return GetParseErrors.errors(@code_lines.map(&:original).join).uniq end missing.map { |miss| why(miss) } diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_token_count.rb index 6fcae7482b..e0562ba9cd 100644 --- a/lib/syntax_suggest/left_right_lex_count.rb +++ b/lib/syntax_suggest/left_right_token_count.rb @@ -9,19 +9,19 @@ module SyntaxSuggest # # Example: # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # left_right.count_kw # left_right.missing.first # # => "end" # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # source = "{ a: b, c: d" # Note missing '}' - # LexAll.new(source: source).each do |lex| - # left_right.count_lex(lex) + # LexAll.new(source: source).each do |token| + # left_right.count_token(token) # end # left_right.missing.first # # => "}" - class LeftRightLexCount + class LeftRightTokenCount def initialize @kw_count = 0 @end_count = 0 @@ -49,52 +49,46 @@ module SyntaxSuggest # # Example: # - # left_right = LeftRightLexCount.new - # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # token = CodeLine.from_source("{").first.tokens.first + # left_right = LeftRightTokenCount.new + # left_right.count_token(Token.new(token) # left_right.count_for_char("{") # # => 1 # left_right.count_for_char("}") # # => 0 - def count_lex(lex) - case lex.type - when :on_tstring_content + def count_token(token) + case token.type + when :STRING_CONTENT # ^^^ # Means it's a string or a symbol `"{"` rather than being # part of a data structure (like a hash) `{ a: b }` # ignore it. - when :on_words_beg, :on_symbos_beg, :on_qwords_beg, - :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg + when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W, + :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN # ^^^ # Handle shorthand syntaxes like `%Q{ i am a string }` # # The start token will be the full thing `%Q{` but we # need to count it as if it's a `{`. Any token # can be used - char = lex.token[-1] + char = token.value[-1] @count_for_char[char] += 1 if @count_for_char.key?(char) - when :on_embexpr_beg + when :EMBEXPR_BEGIN # ^^^ # Embedded string expressions like `"#{foo} <-embed"` # are parsed with chars: # - # `#{` as :on_embexpr_beg - # `}` as :on_embexpr_end - # - # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end - # because sometimes the lexer thinks something is an embed - # string end, when it is not like `lol = }` (no clue why). + # `#{` as :EMBEXPR_BEGIN + # `}` as :EMBEXPR_END # # When we see `#{` count it as a `{` or we will # have a mis-match count. # - case lex.token - when "\#{" - @count_for_char["{"] += 1 - end + @count_for_char["{"] += 1 else - @end_count += 1 if lex.is_end? - @kw_count += 1 if lex.is_kw? - @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token) + @end_count += 1 if token.is_end? + @kw_count += 1 if token.is_kw? + @count_for_char[token.value] += 1 if @count_for_char.key?(token.value) end end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb deleted file mode 100644 index 132cba9f5d..0000000000 --- a/lib/syntax_suggest/lex_all.rb +++ /dev/null @@ -1,55 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Ripper.lex is not guaranteed to lex the entire source document - # - # lex = LexAll.new(source: source) - # lex.each do |value| - # puts value.line - # end - class LexAll - include Enumerable - - def initialize(source:, source_lines: nil) - @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos) - lineno = @lex.last.pos.first + 1 - source_lines ||= source.lines - last_lineno = source_lines.length - - until lineno >= last_lineno - lines = source_lines[lineno..-1] - - @lex.concat( - Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos) - ) - lineno = @lex.last.pos.first + 1 - end - - last_lex = nil - @lex.map! { |elem| - last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex) - } - end - - def to_a - @lex - end - - def each - return @lex.each unless block_given? - @lex.each do |x| - yield x - end - end - - def [](index) - @lex[index] - end - - def last - @lex.last - end - end -end - -require_relative "lex_value" diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb deleted file mode 100644 index 008cc105b5..0000000000 --- a/lib/syntax_suggest/lex_value.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Value object for accessing lex values - # - # This lex: - # - # [1, 0], :on_ident, "describe", CMDARG - # - # Would translate into: - # - # lex.line # => 1 - # lex.type # => :on_indent - # lex.token # => "describe" - class LexValue - attr_reader :line, :type, :token, :state - - def initialize(line, type, token, state, last_lex = nil) - @line = line - @type = type - @token = token - @state = state - - set_kw_end(last_lex) - end - - private def set_kw_end(last_lex) - @is_end = false - @is_kw = false - return if type != :on_kw - # - return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 - - case token - when "if", "unless", "while", "until" - # Only count if/unless when it's not a "trailing" if/unless - # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 - @is_kw = true unless expr_label? - when "def", "case", "for", "begin", "class", "module", "do" - @is_kw = true - when "end" - @is_end = true - end - end - - def fname? - state.allbits?(Ripper::EXPR_FNAME) - end - - def ignore_newline? - type == :on_ignored_nl - end - - def is_end? - @is_end - end - - def is_kw? - @is_kw - end - - def expr_beg? - state.anybits?(Ripper::EXPR_BEG) - end - - def expr_label? - state.allbits?(Ripper::EXPR_LABEL) - end - end -end diff --git a/lib/syntax_suggest/mini_stringio.rb b/lib/syntax_suggest/mini_stringio.rb new file mode 100644 index 0000000000..1a82572eeb --- /dev/null +++ b/lib/syntax_suggest/mini_stringio.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Mini String IO [Private] + # + # Acts like a StringIO with reduced API, but without having to require that + # class. + # + # The original codebase emitted directly to $stderr, but now SyntaxError#detailed_message + # needs a string output. To accomplish that we kept the original print infrastructure in place and + # added this class to accumulate the print output into a string. + class MiniStringIO + EMPTY_ARG = Object.new + + def initialize(isatty: $stderr.isatty) + @string = +"" + @isatty = isatty + end + + attr_reader :isatty + def puts(value = EMPTY_ARG, **) + if !value.equal?(EMPTY_ARG) + @string << value + end + @string << $/ + end + + attr_reader :string + end +end diff --git a/lib/syntax_suggest/parse_blocks_from_indent_line.rb b/lib/syntax_suggest/parse_blocks_from_indent_line.rb index d1071732fe..39dfca55d2 100644 --- a/lib/syntax_suggest/parse_blocks_from_indent_line.rb +++ b/lib/syntax_suggest/parse_blocks_from_indent_line.rb @@ -8,7 +8,7 @@ module SyntaxSuggest # grabbing one that contains only an "end". In this example: # # def dog - # begonn # mispelled `begin` + # begonn # misspelled `begin` # puts "bark" # end # end @@ -36,8 +36,8 @@ module SyntaxSuggest # Builds blocks from bottom up def each_neighbor_block(target_line) scan = AroundBlockScan.new(code_lines: code_lines, block: CodeBlock.new(lines: target_line)) - .skip(:empty?) - .skip(:hidden?) + .force_add_empty + .force_add_hidden .scan_while { |line| line.indent >= target_line.indent } neighbors = scan.code_block.lines diff --git a/lib/syntax_suggest/pathname_from_message.rb b/lib/syntax_suggest/pathname_from_message.rb index ea1a90856e..ab90227427 100644 --- a/lib/syntax_suggest/pathname_from_message.rb +++ b/lib/syntax_suggest/pathname_from_message.rb @@ -4,7 +4,7 @@ module SyntaxSuggest # Converts a SyntaxError message to a path # # Handles the case where the filename has a colon in it - # such as on a windows file system: https://github.com/zombocom/syntax_suggest/issues/111 + # such as on a windows file system: https://github.com/ruby/syntax_suggest/issues/111 # # Example: # @@ -13,7 +13,7 @@ module SyntaxSuggest # # => "/tmp/scratch.rb" # class PathnameFromMessage - EVAL_RE = /^\(eval\):\d+/ + EVAL_RE = /^\(eval.*\):\d+/ STREAMING_RE = /^-:\d+/ attr_reader :name diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb deleted file mode 100644 index 48eb206e48..0000000000 --- a/lib/syntax_suggest/ripper_errors.rb +++ /dev/null @@ -1,36 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Capture parse errors from ripper - # - # Example: - # - # puts RipperErrors.new(" def foo").call.errors - # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"] - class RipperErrors < Ripper - attr_reader :errors - - # Comes from ripper, called - # on every parse error, msg - # is a string - def on_parse_error(msg) - @errors ||= [] - @errors << msg - end - - alias_method :on_alias_error, :on_parse_error - alias_method :on_assign_error, :on_parse_error - alias_method :on_class_name_error, :on_parse_error - alias_method :on_param_error, :on_parse_error - alias_method :compile_error, :on_parse_error - - def call - @run_once ||= begin - @errors = [] - parse - true - end - self - end - end -end diff --git a/lib/syntax_suggest/scan_history.rb b/lib/syntax_suggest/scan_history.rb new file mode 100644 index 0000000000..dc36e6ba2e --- /dev/null +++ b/lib/syntax_suggest/scan_history.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Scans up/down from the given block + # + # You can try out a change, stash it, or commit it to save for later + # + # Example: + # + # scanner = ScanHistory.new(code_lines: code_lines, block: block) + # scanner.scan( + # up: ->(_, _, _) { true }, + # down: ->(_, _, _) { true } + # ) + # scanner.changed? # => true + # expect(scanner.lines).to eq(code_lines) + # + # scanner.stash_changes + # + # expect(scanner.lines).to_not eq(code_lines) + class ScanHistory + attr_reader :before_index, :after_index + + def initialize(code_lines:, block:) + @code_lines = code_lines + @history = [block] + refresh_index + end + + def commit_if_changed + if changed? + @history << CodeBlock.new(lines: @code_lines[before_index..after_index]) + end + + self + end + + # Discards any changes that have not been committed + def stash_changes + refresh_index + self + end + + # Discard changes that have not been committed and revert the last commit + # + # Cannot revert the first commit + def revert_last_commit + if @history.length > 1 + @history.pop + refresh_index + end + + self + end + + def changed? + @before_index != current.lines.first.index || + @after_index != current.lines.last.index + end + + # Iterates up and down + # + # Returns line, kw_count, end_count for each iteration + def scan(up:, down:) + kw_count = 0 + end_count = 0 + + up_index = before_lines.reverse_each.take_while do |line| + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + up.call(line, kw_count, end_count) + end.last&.index + + kw_count = 0 + end_count = 0 + + down_index = after_lines.each.take_while do |line| + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + down.call(line, kw_count, end_count) + end.last&.index + + @before_index = if up_index && up_index < @before_index + up_index + else + @before_index + end + + @after_index = if down_index && down_index > @after_index + down_index + else + @after_index + end + + self + end + + def next_up + return nil if @before_index <= 0 + + @code_lines[@before_index - 1] + end + + def next_down + return nil if @after_index >= @code_lines.length + + @code_lines[@after_index + 1] + end + + def lines + @code_lines[@before_index..@after_index] + end + + private def before_lines + @code_lines[0...@before_index] || [] + end + + # Returns an array of all the CodeLines that exist after + # the currently scanned block + private def after_lines + @code_lines[@after_index.next..] || [] + end + + private def current + @history.last + end + + private def refresh_index + @before_index = current.lines.first.index + @after_index = current.lines.last.index + self + end + end +end diff --git a/lib/syntax_suggest/syntax_suggest.gemspec b/lib/syntax_suggest/syntax_suggest.gemspec index acf9be7710..44e458aaad 100644 --- a/lib/syntax_suggest/syntax_suggest.gemspec +++ b/lib/syntax_suggest/syntax_suggest.gemspec @@ -14,12 +14,12 @@ Gem::Specification.new do |spec| spec.summary = "Find syntax errors in your source in a snap" spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it' - spec.homepage = "https://github.com/zombocom/syntax_suggest.git" + spec.homepage = "https://github.com/ruby/syntax_suggest.git" spec.license = "MIT" - spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0") + spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0") spec.metadata["homepage_uri"] = spec.homepage - spec.metadata["source_code_uri"] = "https://github.com/zombocom/syntax_suggest.git" + spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git" # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. @@ -27,6 +27,6 @@ Gem::Specification.new do |spec| `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features|assets)/}) } end spec.bindir = "exe" - spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.executables = ["syntax_suggest"] spec.require_paths = ["lib"] end diff --git a/lib/syntax_suggest/token.rb b/lib/syntax_suggest/token.rb new file mode 100644 index 0000000000..fc52639b1f --- /dev/null +++ b/lib/syntax_suggest/token.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Value object for accessing lex values + # + # This lex: + # + # [IDENTIFIER(1,0)-(1,8)("describe"), 32] + # + # Would translate into: + # + # lex.location # => (1,0)-(1,8) + # lex.type # => :IDENTIFIER + # lex.token # => "describe" + class Token + attr_reader :location, :type, :value + + KW_TYPES = %i[ + KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL + KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP + ].to_set.freeze + private_constant :KW_TYPES + + def initialize(prism_token, previous_prism_token, visitor) + @location = prism_token.location + @type = prism_token.type + @value = prism_token.value + + # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE + # https://github.com/ruby/prism/issues/3940 + symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN + @is_kw = KW_TYPES.include?(@type) + @is_kw = false if symbol_content || visitor.endless_def_keyword_offsets.include?(@location.start_offset) + @is_end = @type == :KEYWORD_END + end + + def line + @location.start_line + end + + def is_end? + @is_end + end + + def is_kw? + @is_kw + end + end +end diff --git a/lib/syntax_suggest/version.rb b/lib/syntax_suggest/version.rb index a5176dcf2e..9114a079f6 100644 --- a/lib/syntax_suggest/version.rb +++ b/lib/syntax_suggest/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxSuggest - VERSION = "0.0.1" + VERSION = "3.0.0" end diff --git a/lib/syntax_suggest/visitor.rb b/lib/syntax_suggest/visitor.rb new file mode 100644 index 0000000000..6e25f7239c --- /dev/null +++ b/lib/syntax_suggest/visitor.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Walks the Prism AST to extract structural info that cannot be reliably determined from tokens + # alone. + # + # Such as the location of lines that must be logically joined so the search algorithm will + # treat them as one. Example: + # + # source = <<~RUBY + # User # 1 + # .where(name: "Earlopain") # 2 + # .first # 3 + # RUBY + # ast, _tokens = Prism.parse_lex(source).value + # visitor = Visitor.new + # visitor.visit(ast) + # visitor.consecutive_lines # => Set[2, 1] + # + # This output means that line 1 and line 2 need to be joined with their next line. + # + # And determining the location of "endless" method definitions. For example: + # + # source = <<~RUBY + # def cube(x) + # x * x * x + # end + # def square(x) = x * x # 1 + # RUBY + # + # ast, _tokens = Prism.parse_lex(source).value + # visitor = Visitor.new + # visitor.visit(ast) + # visitor.endless_def_keyword_offsets # => Set[28] + class Visitor < Prism::Visitor + attr_reader :endless_def_keyword_offsets, :consecutive_lines + + def initialize + @endless_def_keyword_offsets = Set.new + @consecutive_lines = Set.new + end + + # Called by Prism::Visitor for every method-call node in the AST + # (e.g. `foo.bar`, `foo.bar.baz`). + def visit_call_node(node) + receiver_loc = node.receiver&.location + call_operator_loc = node.call_operator_loc + message_loc = node.message_loc + if receiver_loc && call_operator_loc && message_loc + # dot-leading (dot on the next line) + # foo # line 1 - consecutive + # .bar # line 2 + if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line + (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line| + @consecutive_lines << line + end + end + + # dot-trailing (dot on the same line as the receiver) + # foo. # line 1 - consecutive + # bar # line 2 + if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line + (call_operator_loc.start_line..message_loc.start_line - 1).each do |line| + @consecutive_lines << line + end + end + end + super + end + + # Called by Prism::Visitor for every `def` node in the AST. + # Records the keyword start location for endless method definitions + # like `def foo = 123`. These are valid without a matching `end`, + # so Token must exclude them when deciding if a line is a keyword. + def visit_def_node(node) + @endless_def_keyword_offsets << node.def_keyword_loc.start_offset if node.equal_loc + super + end + end +end |
