diff options
Diffstat (limited to 'lib/syntax_suggest')
| -rw-r--r-- | lib/syntax_suggest/api.rb | 53 | ||||
| -rw-r--r-- | lib/syntax_suggest/clean_document.rb | 105 | ||||
| -rw-r--r-- | lib/syntax_suggest/code_line.rb | 134 | ||||
| -rw-r--r-- | lib/syntax_suggest/core_ext.rb | 127 | ||||
| -rw-r--r-- | lib/syntax_suggest/explain_syntax.rb | 18 | ||||
| -rw-r--r-- | lib/syntax_suggest/left_right_token_count.rb (renamed from lib/syntax_suggest/left_right_lex_count.rb) | 48 | ||||
| -rw-r--r-- | lib/syntax_suggest/lex_all.rb | 74 | ||||
| -rw-r--r-- | lib/syntax_suggest/lex_value.rb | 70 | ||||
| -rw-r--r-- | lib/syntax_suggest/ripper_errors.rb | 39 | ||||
| -rw-r--r-- | lib/syntax_suggest/syntax_suggest.gemspec | 2 | ||||
| -rw-r--r-- | lib/syntax_suggest/token.rb | 49 | ||||
| -rw-r--r-- | lib/syntax_suggest/version.rb | 2 | ||||
| -rw-r--r-- | lib/syntax_suggest/visitor.rb | 80 |
13 files changed, 271 insertions, 530 deletions
diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index 46c9c8adac..5054efa888 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -7,25 +7,8 @@ require "stringio" require "pathname" require "timeout" -# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism -# for lexing and parsing -require "ripper" - # Prism is the new parser, replacing Ripper -# -# We need to "dual boot" both for now because syntax_suggest -# supports older rubies that do not ship with syntax suggest. -# -# We also need the ability to control loading of this library -# so we can test that both modes work correctly in CI. -if (value = ENV["SYNTAX_SUGGEST_DISABLE_PRISM"]) - warn "Skipping loading prism due to SYNTAX_SUGGEST_DISABLE_PRISM=#{value}" -else - begin - require "prism" - rescue LoadError - end -end +require "prism" module SyntaxSuggest # Used to indicate a default value that cannot @@ -35,14 +18,6 @@ module SyntaxSuggest class Error < StandardError; end TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i - # SyntaxSuggest.use_prism_parser? [Private] - # - # Tells us if the prism parser is available for use - # or if we should fallback to `Ripper` - def self.use_prism_parser? - defined?(Prism) - end - # SyntaxSuggest.handle_error [Public] # # Takes a `SyntaxError` exception, uses the @@ -146,30 +121,17 @@ module SyntaxSuggest def self.valid_without?(without_lines:, code_lines:) lines = code_lines - Array(without_lines).flatten - if lines.empty? - true - else - valid?(lines) - end + lines.empty? || valid?(lines) end # SyntaxSuggest.invalid? [Private] # # Opposite of `SyntaxSuggest.valid?` - if defined?(Prism) - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s - Prism.parse(source).failure? - end - else - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s - - Ripper.new(source).tap(&:parse).error? - end + Prism.parse(source).failure? end # SyntaxSuggest.valid? [Private] @@ -223,7 +185,6 @@ require_relative "explain_syntax" require_relative "clean_document" # Helpers -require_relative "lex_all" require_relative "code_line" require_relative "code_block" require_relative "block_expand" @@ -235,3 +196,5 @@ require_relative "priority_engulf_queue" require_relative "pathname_from_message" require_relative "display_invalid_blocks" require_relative "parse_blocks_from_indent_line" +require_relative "visitor" +require_relative "token" diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index ba307af46e..94c68d8ad4 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -66,27 +66,9 @@ module SyntaxSuggest # # All of these problems are fixed by joining the whole heredoc into a single # line. - # - # ## Comments and whitespace - # - # Comments can throw off the way the lexer tells us that the line - # logically belongs with the next line. This is valid ruby but - # results in a different lex output than before: - # - # 1 User. - # 2 where(name: "schneems"). - # 3 # Comment here - # 4 first - # - # To handle this we can replace comment lines with empty lines - # and then re-lex the source. This removal and re-lexing preserves - # line index and document size, but generates an easier to work with - # document. - # class CleanDocument def initialize(source:) - lines = clean_sweep(source: source) - @document = CodeLine.from_source(lines.join, lines: lines) + @document = CodeLine.from_source(source) end # Call all of the document "cleaners" @@ -110,62 +92,6 @@ module SyntaxSuggest @document.join end - # Remove comments - # - # replace with empty newlines - # - # source = <<~'EOM' - # # Comment 1 - # puts "hello" - # # Comment 2 - # puts "world" - # EOM - # - # lines = CleanDocument.new(source: source).lines - # expect(lines[0].to_s).to eq("\n") - # expect(lines[1].to_s).to eq("puts "hello") - # expect(lines[2].to_s).to eq("\n") - # expect(lines[3].to_s).to eq("puts "world") - # - # Important: This must be done before lexing. - # - # After this change is made, we lex the document because - # removing comments can change how the doc is parsed. - # - # For example: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # # comment - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(1) - # - # After the comment is removed: - # - # values = LexAll.new(source: <<~EOM)) - # User. - # - # where(name: 'schneems') - # EOM - # expect( - # values.count {|v| v.type == :on_ignored_nl} - # ).to eq(2) - # - def clean_sweep(source:) - # Match comments, but not HEREDOC strings with #{variable} interpolation - # https://rubular.com/r/HPwtW9OYxKUHXQ - source.lines.map do |line| - if line.match?(/^\s*#([^{].*|)$/) - $/ - else - line - end - end - end - # Smushes all heredoc lines into one line # # source = <<~'EOM' @@ -182,11 +108,11 @@ module SyntaxSuggest start_index_stack = [] heredoc_beg_end_index = [] lines.each do |line| - line.lex.each do |lex_value| - case lex_value.type - when :on_heredoc_beg + line.tokens.each do |token| + case token.type + when :HEREDOC_START start_index_stack << line.index - when :on_heredoc_end + when :HEREDOC_END start_index = start_index_stack.pop end_index = line.index heredoc_beg_end_index << [start_index, end_index] @@ -212,20 +138,10 @@ module SyntaxSuggest # expect(lines[0].to_s).to eq(source) # expect(lines[1].to_s).to eq("") # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - # def join_consecutive! - consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line| + consecutive_groups = @document.select(&:consecutive?).map do |code_line| take_while_including(code_line.index..) do |line| - line.ignore_newline_not_beg? + line.consecutive? end end @@ -273,16 +189,17 @@ module SyntaxSuggest # Join group into the first line @document[line.index] = CodeLine.new( - lex: lines.map(&:lex).flatten, + tokens: lines.map(&:tokens).flatten, line: lines.join, - index: line.index + index: line.index, + consecutive: false ) # Hide the rest of the lines lines[1..].each do |line| # The above lines already have newlines in them, if add more # then there will be double newline, use an empty line instead - @document[line.index] = CodeLine.new(line: "", index: line.index, lex: []) + @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false) end end self diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index 58197e95d0..7fb1aae26a 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -26,23 +26,57 @@ module SyntaxSuggest # Returns an array of CodeLine objects # from the source string - def self.from_source(source, lines: nil) - lines ||= source.lines - lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } - lines.map.with_index do |line, index| + def self.from_source(source) + source = +source + parse_result = Prism.parse_lex(source) + ast, tokens = parse_result.value + + clean_comments!(source, parse_result.comments) + + visitor = Visitor.new + visitor.visit(ast) + tokens.sort_by! { |token, _state| token.location.start_line } + + prev_token = nil + tokens.map! do |token, _state| + prev_token = Token.new(token, prev_token, visitor) + end + + tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token } + source.lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, - lex: lex_array_for_line[index + 1] + tokens: tokens_for_line[index + 1], + consecutive: visitor.consecutive_lines.include?(index + 1) ) end end - attr_reader :line, :index, :lex, :line_number, :indent - def initialize(line:, index:, lex:) - @lex = lex + # Remove comments that apear on their own in source. They will never be the cause + # of syntax errors and are just visual noise. Example: + # + # source = +<<~RUBY + # # Comment-only line + # foo # Inline comment + # RUBY + # CodeLine.clean_comments!(source, Prism.parse(source).comments) + # source # => "\nfoo # Inline comment\n" + def self.clean_comments!(source, comments) + # Iterate backwards since we are modifying the source in place and must preserve + # the offsets. Prism comments are sorted by their location in the source. + comments.reverse_each do |comment| + next if comment.trailing? + source.bytesplice(comment.location.start_offset, comment.location.length, "") + end + end + + attr_reader :line, :index, :tokens, :line_number, :indent + def initialize(line:, index:, tokens:, consecutive:) + @tokens = tokens @line = line @index = index + @consecutive = consecutive @original = line @line_number = @index + 1 strip_line = line.dup @@ -151,92 +185,36 @@ module SyntaxSuggest index <=> other.index end - # [Not stable API] - # - # Lines that have a `on_ignored_nl` type token and NOT - # a `BEG` type seem to be a good proxy for the ability - # to join multiple lines into one. - # - # This predicate method is used to determine when those - # two criteria have been met. - # - # The one known case this doesn't handle is: - # - # Ripper.lex <<~EOM - # a && - # b || - # c - # EOM - # - # For some reason this introduces `on_ignore_newline` but with BEG type - def ignore_newline_not_beg? - @ignore_newline_not_beg + # Can this line be logically joined together + # with the following line? Determined by walking + # the AST + def consecutive? + @consecutive end - # Determines if the given line has a trailing slash + # Determines if the given line has a trailing slash. + # Simply check if the line contains a backslash after + # the content of the last token. # # lines = CodeLine.from_source(<<~EOM) # it "foo" \ # EOM # expect(lines.first.trailing_slash?).to eq(true) # - if SyntaxSuggest.use_prism_parser? - def trailing_slash? - last = @lex.last - last&.type == :on_tstring_end - end - else - def trailing_slash? - last = @lex.last - return false unless last - return false unless last.type == :on_sp - - last.token == TRAILING_SLASH - end + def trailing_slash? + return unless (last = @tokens.last) + @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil end - # Endless method detection - # - # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab - # Detecting a "oneliner" seems to need a state machine. - # This can be done by looking mostly at the "state" (last value): - # - # ENDFN -> BEG (token = '=' ) -> END - # private def set_kw_end - oneliner_count = 0 - in_oneliner_def = nil - kw_count = 0 end_count = 0 - @ignore_newline_not_beg = false - @lex.each do |lex| - kw_count += 1 if lex.is_kw? - end_count += 1 if lex.is_end? - - if lex.type == :on_ignored_nl - @ignore_newline_not_beg = !lex.expr_beg? - end - - if in_oneliner_def.nil? - in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) - elsif lex.state.allbits?(Ripper::EXPR_ENDFN) - # Continue - elsif lex.state.allbits?(Ripper::EXPR_BEG) - in_oneliner_def = :BODY if lex.token == "=" - elsif lex.state.allbits?(Ripper::EXPR_END) - # We found an endless method, count it - oneliner_count += 1 if in_oneliner_def == :BODY - - in_oneliner_def = nil - else - in_oneliner_def = nil - end + @tokens.each do |token| + kw_count += 1 if token.is_kw? + end_count += 1 if token.is_end? end - kw_count -= oneliner_count - @is_kw = (kw_count - end_count) > 0 @is_end = (end_count - kw_count) > 0 end diff --git a/lib/syntax_suggest/core_ext.rb b/lib/syntax_suggest/core_ext.rb index 94f57ba605..ffbc922eed 100644 --- a/lib/syntax_suggest/core_ext.rb +++ b/lib/syntax_suggest/core_ext.rb @@ -1,96 +1,47 @@ # frozen_string_literal: true -# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require` -if SyntaxError.method_defined?(:detailed_message) - module SyntaxSuggest - # SyntaxSuggest.module_for_detailed_message [Private] - # - # Used to monkeypatch SyntaxError via Module.prepend - def self.module_for_detailed_message - Module.new { - def detailed_message(highlight: true, syntax_suggest: true, **kwargs) - return super unless syntax_suggest - - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - message = super - - if path - file = Pathname.new(path) - io = SyntaxSuggest::MiniStringIO.new - - SyntaxSuggest.call( - io: io, - source: file.read, - filename: file, - terminal: highlight - ) - annotation = io.string - - annotation += "\n" unless annotation.end_with?("\n") - - annotation + message - else - message - end - rescue => e - if ENV["SYNTAX_SUGGEST_DEBUG"] - $stderr.warn(e.message) - $stderr.warn(e.backtrace) - end - - # Ignore internal errors +module SyntaxSuggest + # SyntaxSuggest.module_for_detailed_message [Private] + # + # Used to monkeypatch SyntaxError via Module.prepend + def self.module_for_detailed_message + Module.new { + def detailed_message(highlight: true, syntax_suggest: true, **kwargs) + return super unless syntax_suggest + + require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) + + message = super + + if path + file = Pathname.new(path) + io = SyntaxSuggest::MiniStringIO.new + + SyntaxSuggest.call( + io: io, + source: file.read, + filename: file, + terminal: highlight + ) + annotation = io.string + + annotation += "\n" unless annotation.end_with?("\n") + + annotation + message + else message end - } - end - end - - SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) -else - autoload :Pathname, "pathname" - - #-- - # Monkey patch kernel to ensure that all `require` calls call the same - # method - #++ - module Kernel - # :stopdoc: - - module_function - - alias_method :syntax_suggest_original_require, :require - alias_method :syntax_suggest_original_require_relative, :require_relative - alias_method :syntax_suggest_original_load, :load - - def load(file, wrap = false) - syntax_suggest_original_load(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end - - def require(file) - syntax_suggest_original_require(file) - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + rescue => e + if ENV["SYNTAX_SUGGEST_DEBUG"] + $stderr.warn(e.message) + $stderr.warn(e.backtrace) + end - def require_relative(file) - if Pathname.new(file).absolute? - syntax_suggest_original_require file - else - relative_from = caller_locations(1..1).first - relative_from_path = relative_from.absolute_path || relative_from.path - syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path) + # Ignore internal errors + message end - rescue SyntaxError => e - require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE) - - SyntaxSuggest.handle_error(e) - end + } end end + +SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message) diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 0d80c4d869..d7f5262ddb 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -1,19 +1,11 @@ # frozen_string_literal: true -require_relative "left_right_lex_count" - -if !SyntaxSuggest.use_prism_parser? - require_relative "ripper_errors" -end +require_relative "left_right_token_count" module SyntaxSuggest class GetParseErrors def self.errors(source) - if SyntaxSuggest.use_prism_parser? - Prism.parse(source).errors.map(&:message) - else - RipperErrors.new(source).call.errors - end + Prism.parse(source).errors.map(&:message) end end @@ -53,14 +45,14 @@ module SyntaxSuggest def initialize(code_lines:) @code_lines = code_lines - @left_right = LeftRightLexCount.new + @left_right = LeftRightTokenCount.new @missing = nil end def call @code_lines.each do |line| - line.lex.each do |lex| - @left_right.count_lex(lex) + line.tokens.each do |token| + @left_right.count_token(token) end end diff --git a/lib/syntax_suggest/left_right_lex_count.rb b/lib/syntax_suggest/left_right_token_count.rb index 6fcae7482b..e0562ba9cd 100644 --- a/lib/syntax_suggest/left_right_lex_count.rb +++ b/lib/syntax_suggest/left_right_token_count.rb @@ -9,19 +9,19 @@ module SyntaxSuggest # # Example: # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # left_right.count_kw # left_right.missing.first # # => "end" # - # left_right = LeftRightLexCount.new + # left_right = LeftRightTokenCount.new # source = "{ a: b, c: d" # Note missing '}' - # LexAll.new(source: source).each do |lex| - # left_right.count_lex(lex) + # LexAll.new(source: source).each do |token| + # left_right.count_token(token) # end # left_right.missing.first # # => "}" - class LeftRightLexCount + class LeftRightTokenCount def initialize @kw_count = 0 @end_count = 0 @@ -49,52 +49,46 @@ module SyntaxSuggest # # Example: # - # left_right = LeftRightLexCount.new - # left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG)) + # token = CodeLine.from_source("{").first.tokens.first + # left_right = LeftRightTokenCount.new + # left_right.count_token(Token.new(token) # left_right.count_for_char("{") # # => 1 # left_right.count_for_char("}") # # => 0 - def count_lex(lex) - case lex.type - when :on_tstring_content + def count_token(token) + case token.type + when :STRING_CONTENT # ^^^ # Means it's a string or a symbol `"{"` rather than being # part of a data structure (like a hash) `{ a: b }` # ignore it. - when :on_words_beg, :on_symbos_beg, :on_qwords_beg, - :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg + when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W, + :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN # ^^^ # Handle shorthand syntaxes like `%Q{ i am a string }` # # The start token will be the full thing `%Q{` but we # need to count it as if it's a `{`. Any token # can be used - char = lex.token[-1] + char = token.value[-1] @count_for_char[char] += 1 if @count_for_char.key?(char) - when :on_embexpr_beg + when :EMBEXPR_BEGIN # ^^^ # Embedded string expressions like `"#{foo} <-embed"` # are parsed with chars: # - # `#{` as :on_embexpr_beg - # `}` as :on_embexpr_end - # - # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end - # because sometimes the lexer thinks something is an embed - # string end, when it is not like `lol = }` (no clue why). + # `#{` as :EMBEXPR_BEGIN + # `}` as :EMBEXPR_END # # When we see `#{` count it as a `{` or we will # have a mis-match count. # - case lex.token - when "\#{" - @count_for_char["{"] += 1 - end + @count_for_char["{"] += 1 else - @end_count += 1 if lex.is_end? - @kw_count += 1 if lex.is_kw? - @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token) + @end_count += 1 if token.is_end? + @kw_count += 1 if token.is_kw? + @count_for_char[token.value] += 1 if @count_for_char.key?(token.value) end end diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb deleted file mode 100644 index c16fbb52d3..0000000000 --- a/lib/syntax_suggest/lex_all.rb +++ /dev/null @@ -1,74 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Ripper.lex is not guaranteed to lex the entire source document - # - # This class guarantees the whole document is lex-ed by iteratively - # lexing the document where ripper stopped. - # - # Prism likely doesn't have the same problem. Once ripper support is removed - # we can likely reduce the complexity here if not remove the whole concept. - # - # Example usage: - # - # lex = LexAll.new(source: source) - # lex.each do |value| - # puts value.line - # end - class LexAll - include Enumerable - - def initialize(source:, source_lines: nil) - @lex = self.class.lex(source, 1) - lineno = @lex.last[0][0] + 1 - source_lines ||= source.lines - last_lineno = source_lines.length - - until lineno >= last_lineno - lines = source_lines[lineno..] - - @lex.concat( - self.class.lex(lines.join, lineno + 1) - ) - - lineno = @lex.last[0].first + 1 - end - - last_lex = nil - @lex.map! { |elem| - last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex) - } - end - - if SyntaxSuggest.use_prism_parser? - def self.lex(source, line_number) - Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] } - end - else - def self.lex(source, line_number) - Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) - end - end - - def to_a - @lex - end - - def each - return @lex.each unless block_given? - @lex.each do |x| - yield x - end - end - - def [](index) - @lex[index] - end - - def last - @lex.last - end - end -end - -require_relative "lex_value" diff --git a/lib/syntax_suggest/lex_value.rb b/lib/syntax_suggest/lex_value.rb deleted file mode 100644 index b46a332772..0000000000 --- a/lib/syntax_suggest/lex_value.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Value object for accessing lex values - # - # This lex: - # - # [1, 0], :on_ident, "describe", CMDARG - # - # Would translate into: - # - # lex.line # => 1 - # lex.type # => :on_indent - # lex.token # => "describe" - class LexValue - attr_reader :line, :type, :token, :state - - def initialize(line, type, token, state, last_lex = nil) - @line = line - @type = type - @token = token - @state = state - - set_kw_end(last_lex) - end - - private def set_kw_end(last_lex) - @is_end = false - @is_kw = false - return if type != :on_kw - - return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953 - - case token - when "if", "unless", "while", "until" - # Only count if/unless when it's not a "trailing" if/unless - # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375 - @is_kw = true unless expr_label? - when "def", "case", "for", "begin", "class", "module", "do" - @is_kw = true - when "end" - @is_end = true - end - end - - def fname? - state.allbits?(Ripper::EXPR_FNAME) - end - - def ignore_newline? - type == :on_ignored_nl - end - - def is_end? - @is_end - end - - def is_kw? - @is_kw - end - - def expr_beg? - state.anybits?(Ripper::EXPR_BEG) - end - - def expr_label? - state.allbits?(Ripper::EXPR_LABEL) - end - end -end diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb deleted file mode 100644 index 4e2bc90948..0000000000 --- a/lib/syntax_suggest/ripper_errors.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -module SyntaxSuggest - # Capture parse errors from Ripper - # - # Prism returns the errors with their messages, but Ripper - # does not. To get them we must make a custom subclass. - # - # Example: - # - # puts RipperErrors.new(" def foo").call.errors - # # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"] - class RipperErrors < Ripper - attr_reader :errors - - # Comes from ripper, called - # on every parse error, msg - # is a string - def on_parse_error(msg) - @errors ||= [] - @errors << msg - end - - alias_method :on_alias_error, :on_parse_error - alias_method :on_assign_error, :on_parse_error - alias_method :on_class_name_error, :on_parse_error - alias_method :on_param_error, :on_parse_error - alias_method :compile_error, :on_parse_error - - def call - @run_once ||= begin - @errors = [] - parse - true - end - self - end - end -end diff --git a/lib/syntax_suggest/syntax_suggest.gemspec b/lib/syntax_suggest/syntax_suggest.gemspec index 756a85bf63..44e458aaad 100644 --- a/lib/syntax_suggest/syntax_suggest.gemspec +++ b/lib/syntax_suggest/syntax_suggest.gemspec @@ -16,7 +16,7 @@ Gem::Specification.new do |spec| spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it' spec.homepage = "https://github.com/ruby/syntax_suggest.git" spec.license = "MIT" - spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0") + spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0") spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git" diff --git a/lib/syntax_suggest/token.rb b/lib/syntax_suggest/token.rb new file mode 100644 index 0000000000..fc52639b1f --- /dev/null +++ b/lib/syntax_suggest/token.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Value object for accessing lex values + # + # This lex: + # + # [IDENTIFIER(1,0)-(1,8)("describe"), 32] + # + # Would translate into: + # + # lex.location # => (1,0)-(1,8) + # lex.type # => :IDENTIFIER + # lex.token # => "describe" + class Token + attr_reader :location, :type, :value + + KW_TYPES = %i[ + KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL + KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP + ].to_set.freeze + private_constant :KW_TYPES + + def initialize(prism_token, previous_prism_token, visitor) + @location = prism_token.location + @type = prism_token.type + @value = prism_token.value + + # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE + # https://github.com/ruby/prism/issues/3940 + symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN + @is_kw = KW_TYPES.include?(@type) + @is_kw = false if symbol_content || visitor.endless_def_keyword_offsets.include?(@location.start_offset) + @is_end = @type == :KEYWORD_END + end + + def line + @location.start_line + end + + def is_end? + @is_end + end + + def is_kw? + @is_kw + end + end +end diff --git a/lib/syntax_suggest/version.rb b/lib/syntax_suggest/version.rb index 1aa908f4e5..9114a079f6 100644 --- a/lib/syntax_suggest/version.rb +++ b/lib/syntax_suggest/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxSuggest - VERSION = "2.0.2" + VERSION = "3.0.0" end diff --git a/lib/syntax_suggest/visitor.rb b/lib/syntax_suggest/visitor.rb new file mode 100644 index 0000000000..6e25f7239c --- /dev/null +++ b/lib/syntax_suggest/visitor.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # Walks the Prism AST to extract structural info that cannot be reliably determined from tokens + # alone. + # + # Such as the location of lines that must be logically joined so the search algorithm will + # treat them as one. Example: + # + # source = <<~RUBY + # User # 1 + # .where(name: "Earlopain") # 2 + # .first # 3 + # RUBY + # ast, _tokens = Prism.parse_lex(source).value + # visitor = Visitor.new + # visitor.visit(ast) + # visitor.consecutive_lines # => Set[2, 1] + # + # This output means that line 1 and line 2 need to be joined with their next line. + # + # And determining the location of "endless" method definitions. For example: + # + # source = <<~RUBY + # def cube(x) + # x * x * x + # end + # def square(x) = x * x # 1 + # RUBY + # + # ast, _tokens = Prism.parse_lex(source).value + # visitor = Visitor.new + # visitor.visit(ast) + # visitor.endless_def_keyword_offsets # => Set[28] + class Visitor < Prism::Visitor + attr_reader :endless_def_keyword_offsets, :consecutive_lines + + def initialize + @endless_def_keyword_offsets = Set.new + @consecutive_lines = Set.new + end + + # Called by Prism::Visitor for every method-call node in the AST + # (e.g. `foo.bar`, `foo.bar.baz`). + def visit_call_node(node) + receiver_loc = node.receiver&.location + call_operator_loc = node.call_operator_loc + message_loc = node.message_loc + if receiver_loc && call_operator_loc && message_loc + # dot-leading (dot on the next line) + # foo # line 1 - consecutive + # .bar # line 2 + if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line + (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line| + @consecutive_lines << line + end + end + + # dot-trailing (dot on the same line as the receiver) + # foo. # line 1 - consecutive + # bar # line 2 + if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line + (call_operator_loc.start_line..message_loc.start_line - 1).each do |line| + @consecutive_lines << line + end + end + end + super + end + + # Called by Prism::Visitor for every `def` node in the AST. + # Records the keyword start location for endless method definitions + # like `def foo = 123`. These are valid without a matching `end`, + # so Token must exclude them when deciding if a line is a keyword. + def visit_def_node(node) + @endless_def_keyword_offsets << node.def_keyword_loc.start_offset if node.equal_loc + super + end + end +end |
