diff options
Diffstat (limited to 'lib/yarp')
| -rw-r--r-- | lib/yarp/desugar_visitor.rb | 204 | ||||
| -rw-r--r-- | lib/yarp/ffi.rb | 251 | ||||
| -rw-r--r-- | lib/yarp/language_server.rb | 166 | ||||
| -rw-r--r-- | lib/yarp/lex_compat.rb | 842 | ||||
| -rw-r--r-- | lib/yarp/pack.rb | 185 | ||||
| -rw-r--r-- | lib/yarp/parse_result/comments.rb | 172 | ||||
| -rw-r--r-- | lib/yarp/parse_result/newlines.rb | 60 | ||||
| -rw-r--r-- | lib/yarp/pattern.rb | 239 | ||||
| -rw-r--r-- | lib/yarp/ripper_compat.rb | 174 | ||||
| -rw-r--r-- | lib/yarp/version.rb | 5 | ||||
| -rw-r--r-- | lib/yarp/yarp.gemspec | 105 |
11 files changed, 0 insertions, 2403 deletions
diff --git a/lib/yarp/desugar_visitor.rb b/lib/yarp/desugar_visitor.rb deleted file mode 100644 index 6ee5861ac8..0000000000 --- a/lib/yarp/desugar_visitor.rb +++ /dev/null @@ -1,204 +0,0 @@ -# frozen_string_literal: true - -module YARP - class DesugarVisitor < MutationVisitor - # @@foo &&= bar - # - # becomes - # - # @@foo && @@foo = bar - def visit_class_variable_and_write_node(node) - desugar_and_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name) - end - - # @@foo ||= bar - # - # becomes - # - # defined?(@@foo) ? @@foo : @@foo = bar - def visit_class_variable_or_write_node(node) - desugar_or_write_defined_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name) - end - - # @@foo += bar - # - # becomes - # - # @@foo = @@foo + bar - def visit_class_variable_operator_write_node(node) - desugar_operator_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name) - end - - # Foo &&= bar - # - # becomes - # - # Foo && Foo = bar - def visit_constant_and_write_node(node) - desugar_and_write_node(node, ConstantReadNode, ConstantWriteNode, node.name) - end - - # Foo ||= bar - # - # becomes - # - # defined?(Foo) ? Foo : Foo = bar - def visit_constant_or_write_node(node) - desugar_or_write_defined_node(node, ConstantReadNode, ConstantWriteNode, node.name) - end - - # Foo += bar - # - # becomes - # - # Foo = Foo + bar - def visit_constant_operator_write_node(node) - desugar_operator_write_node(node, ConstantReadNode, ConstantWriteNode, node.name) - end - - # $foo &&= bar - # - # becomes - # - # $foo && $foo = bar - def visit_global_variable_and_write_node(node) - desugar_and_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name) - end - - # $foo ||= bar - # - # becomes - # - # defined?($foo) ? $foo : $foo = bar - def visit_global_variable_or_write_node(node) - desugar_or_write_defined_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name) - end - - # $foo += bar - # - # becomes - # - # $foo = $foo + bar - def visit_global_variable_operator_write_node(node) - desugar_operator_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name) - end - - # @foo &&= bar - # - # becomes - # - # @foo && @foo = bar - def visit_instance_variable_and_write_node(node) - desugar_and_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name) - end - - # @foo ||= bar - # - # becomes - # - # @foo || @foo = bar - def visit_instance_variable_or_write_node(node) - desugar_or_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name) - end - - # @foo += bar - # - # becomes - # - # @foo = @foo + bar - def visit_instance_variable_operator_write_node(node) - desugar_operator_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name) - end - - # foo &&= bar - # - # becomes - # - # foo && foo = bar - def visit_local_variable_and_write_node(node) - desugar_and_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth) - end - - # foo ||= bar - # - # becomes - # - # foo || foo = bar - def visit_local_variable_or_write_node(node) - desugar_or_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth) - end - - # foo += bar - # - # becomes - # - # foo = foo + bar - def visit_local_variable_operator_write_node(node) - desugar_operator_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth) - end - - private - - # Desugar `x &&= y` to `x && x = y` - def desugar_and_write_node(node, read_class, write_class, *arguments) - AndNode.new( - read_class.new(*arguments, node.name_loc), - write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location), - node.operator_loc, - node.location - ) - end - - # Desugar `x += y` to `x = x + y` - def desugar_operator_write_node(node, read_class, write_class, *arguments) - write_class.new( - *arguments, - node.name_loc, - CallNode.new( - read_class.new(*arguments, node.name_loc), - nil, - node.operator_loc.copy(length: node.operator_loc.length - 1), - nil, - ArgumentsNode.new([node.value], node.value.location), - nil, - nil, - 0, - node.operator_loc.slice.chomp("="), - node.location - ), - node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1), - node.location - ) - end - - # Desugar `x ||= y` to `x || x = y` - def desugar_or_write_node(node, read_class, write_class, *arguments) - OrNode.new( - read_class.new(*arguments, node.name_loc), - write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location), - node.operator_loc, - node.location - ) - end - - # Desugar `x ||= y` to `defined?(x) ? x : x = y` - def desugar_or_write_defined_node(node, read_class, write_class, *arguments) - IfNode.new( - node.operator_loc, - DefinedNode.new(nil, read_class.new(*arguments, node.name_loc), nil, node.operator_loc, node.name_loc), - StatementsNode.new([read_class.new(*arguments, node.name_loc)], node.location), - ElseNode.new( - node.operator_loc, - StatementsNode.new( - [write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location)], - node.location - ), - node.operator_loc, - node.location - ), - node.operator_loc, - node.location - ) - end - end -end diff --git a/lib/yarp/ffi.rb b/lib/yarp/ffi.rb deleted file mode 100644 index 82643be808..0000000000 --- a/lib/yarp/ffi.rb +++ /dev/null @@ -1,251 +0,0 @@ -# frozen_string_literal: true - -# This file is responsible for mirroring the API provided by the C extension by -# using FFI to call into the shared library. - -require "rbconfig" -require "ffi" - -module YARP - BACKEND = :FFI - - module LibRubyParser - extend FFI::Library - - # Define the library that we will be pulling functions from. Note that this - # must align with the build shared library from make/rake. - ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__) - - # Convert a native C type declaration into a symbol that FFI understands. - # For example: - # - # const char * -> :pointer - # bool -> :bool - # size_t -> :size_t - # void -> :void - # - def self.resolve_type(type) - type = type.strip.delete_prefix("const ") - type.end_with?("*") ? :pointer : type.to_sym - end - - # Read through the given header file and find the declaration of each of the - # given functions. For each one, define a function with the same name and - # signature as the C function. - def self.load_exported_functions_from(header, *functions) - File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line| - # We only want to attempt to load exported functions. - next unless line.start_with?("YP_EXPORTED_FUNCTION ") - - # We only want to load the functions that we are interested in. - next unless functions.any? { |function| line.include?(function) } - - # Parse the function declaration. - unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line - raise "Could not parse #{line}" - end - - # Delete the function from the list of functions we are looking for to - # mark it as having been found. - functions.delete(name) - - # Split up the argument types into an array, ensure we handle the case - # where there are no arguments (by explicit void). - arg_types = arg_types.split(",").map(&:strip) - arg_types = [] if arg_types == %w[void] - - # Resolve the type of the argument by dropping the name of the argument - # first if it is present. - arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) } - - # Attach the function using the FFI library. - attach_function name, arg_types, resolve_type(return_type) - end - - # If we didn't find all of the functions, raise an error. - raise "Could not find functions #{functions.inspect}" unless functions.empty? - end - - load_exported_functions_from( - "yarp.h", - "yp_version", - "yp_parse_serialize", - "yp_lex_serialize", - "yp_parse_lex_serialize" - ) - - load_exported_functions_from( - "yarp/util/yp_buffer.h", - "yp_buffer_sizeof", - "yp_buffer_init", - "yp_buffer_value", - "yp_buffer_length", - "yp_buffer_free" - ) - - load_exported_functions_from( - "yarp/util/yp_string.h", - "yp_string_mapped_init", - "yp_string_free", - "yp_string_source", - "yp_string_length", - "yp_string_sizeof" - ) - - # This object represents a yp_buffer_t. We only use it as an opaque pointer, - # so it doesn't need to know the fields of yp_buffer_t. - class YPBuffer - SIZEOF = LibRubyParser.yp_buffer_sizeof - - attr_reader :pointer - - def initialize(pointer) - @pointer = pointer - end - - def value - LibRubyParser.yp_buffer_value(pointer) - end - - def length - LibRubyParser.yp_buffer_length(pointer) - end - - def read - value.read_string(length) - end - - # Initialize a new buffer and yield it to the block. The buffer will be - # automatically freed when the block returns. - def self.with(&block) - pointer = FFI::MemoryPointer.new(SIZEOF) - - begin - raise unless LibRubyParser.yp_buffer_init(pointer) - yield new(pointer) - ensure - LibRubyParser.yp_buffer_free(pointer) - pointer.free - end - end - end - - # This object represents a yp_string_t. We only use it as an opaque pointer, - # so it doesn't have to be an FFI::Struct. - class YPString - SIZEOF = LibRubyParser.yp_string_sizeof - - attr_reader :pointer - - def initialize(pointer) - @pointer = pointer - end - - def source - LibRubyParser.yp_string_source(pointer) - end - - def length - LibRubyParser.yp_string_length(pointer) - end - - def read - source.read_string(length) - end - - # Yields a yp_string_t pointer to the given block. - def self.with(filepath, &block) - pointer = FFI::MemoryPointer.new(SIZEOF) - - begin - raise unless LibRubyParser.yp_string_mapped_init(pointer, filepath) - yield new(pointer) - ensure - LibRubyParser.yp_string_free(pointer) - pointer.free - end - end - end - - def self.dump_internal(source, source_size, filepath) - YPBuffer.with do |buffer| - metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath - yp_parse_serialize(source, source_size, buffer.pointer, metadata) - buffer.read - end - end - end - - # Mark the LibRubyParser module as private as it should only be called through - # the YARP module. - private_constant :LibRubyParser - - # The version constant is set by reading the result of calling yp_version. - VERSION = LibRubyParser.yp_version.read_string - - # Mirror the YARP.dump API by using the serialization API. - def self.dump(code, filepath = nil) - LibRubyParser.dump_internal(code, code.bytesize, filepath) - end - - # Mirror the YARP.dump_file API by using the serialization API. - def self.dump_file(filepath) - LibRubyParser::YPString.with(filepath) do |string| - LibRubyParser.dump_internal(string.source, string.length, filepath) - end - end - - # Mirror the YARP.lex API by using the serialization API. - def self.lex(code, filepath = nil) - LibRubyParser::YPBuffer.with do |buffer| - LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer.pointer) - Serialize.load_tokens(Source.new(code), buffer.read) - end - end - - # Mirror the YARP.lex_file API by using the serialization API. - def self.lex_file(filepath) - LibRubyParser::YPString.with(filepath) do |string| - lex(string.read, filepath) - end - end - - # Mirror the YARP.parse API by using the serialization API. - def self.parse(code, filepath = nil) - YARP.load(code, dump(code, filepath)) - end - - # Mirror the YARP.parse_file API by using the serialization API. This uses - # native strings instead of Ruby strings because it allows us to use mmap when - # it is available. - def self.parse_file(filepath) - LibRubyParser::YPString.with(filepath) do |string| - parse(string.read, filepath) - end - end - - # Mirror the YARP.parse_lex API by using the serialization API. - def self.parse_lex(code, filepath = nil) - LibRubyParser::YPBuffer.with do |buffer| - metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath - LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata) - - source = Source.new(code) - loader = Serialize::Loader.new(source, buffer.read) - - tokens = loader.load_tokens - node, comments, errors, warnings = loader.load_nodes - - tokens.each { |token,| token.value.force_encoding(loader.encoding) } - - ParseResult.new([node, tokens], comments, errors, warnings, source) - end - end - - # Mirror the YARP.parse_lex_file API by using the serialization API. - def self.parse_lex_file(filepath) - LibRubyParser::YPString.with(filepath) do |string| - parse_lex(string.read, filepath) - end - end -end diff --git a/lib/yarp/language_server.rb b/lib/yarp/language_server.rb deleted file mode 100644 index 5a10d484a1..0000000000 --- a/lib/yarp/language_server.rb +++ /dev/null @@ -1,166 +0,0 @@ -# frozen_string_literal: true - -require "cgi" -require "json" -require "uri" - -module YARP - # YARP additionally ships with a language server conforming to the - # language server protocol. It can be invoked by running the yarp-lsp - # bin script (bin/yarp-lsp) - class LanguageServer - GITHUB_TEMPLATE = <<~TEMPLATE - Reporting issue with error `%{error}`. - - ## Expected behavior - <!-- TODO: Briefly explain what the expected behavior should be on this example. --> - - ## Actual behavior - <!-- TODO: Describe here what actually happened. --> - - ## Steps to reproduce the problem - <!-- TODO: Describe how we can reproduce the problem. --> - - ## Additional information - <!-- TODO: Include any additional information, such as screenshots. --> - - TEMPLATE - - attr_reader :input, :output - - def initialize( - input: $stdin, - output: $stdout - ) - @input = input.binmode - @output = output.binmode - end - - # rubocop:disable Layout/LineLength - def run - store = - Hash.new do |hash, uri| - filepath = CGI.unescape(URI.parse(uri).path) - File.exist?(filepath) ? (hash[uri] = File.read(filepath)) : nil - end - - while (headers = input.gets("\r\n\r\n")) - source = input.read(headers[/Content-Length: (\d+)/i, 1].to_i) - request = JSON.parse(source, symbolize_names: true) - - # stree-ignore - case request - in { method: "initialize", id: } - store.clear - write(id: id, result: { capabilities: capabilities }) - in { method: "initialized" } - # ignored - in { method: "shutdown" } # tolerate missing ID to be a good citizen - store.clear - write(id: request[:id], result: {}) - in { method: "exit"} - return - in { method: "textDocument/didChange", params: { textDocument: { uri: }, contentChanges: [{ text: }, *] } } - store[uri] = text - in { method: "textDocument/didOpen", params: { textDocument: { uri:, text: } } } - store[uri] = text - in { method: "textDocument/didClose", params: { textDocument: { uri: } } } - store.delete(uri) - in { method: "textDocument/diagnostic", id:, params: { textDocument: { uri: } } } - contents = store[uri] - write(id: id, result: contents ? diagnostics(contents) : nil) - in { method: "textDocument/codeAction", id:, params: { textDocument: { uri: }, context: { diagnostics: }}} - contents = store[uri] - write(id: id, result: contents ? code_actions(contents, diagnostics) : nil) - in { method: %r{\$/.+} } - # ignored - end - end - end - # rubocop:enable Layout/LineLength - - private - - def capabilities - { - codeActionProvider: { - codeActionKinds: [ - 'quickfix', - ], - }, - diagnosticProvider: { - interFileDependencies: false, - workspaceDiagnostics: false, - }, - textDocumentSync: { - change: 1, - openClose: true - }, - } - end - - def code_actions(source, diagnostics) - diagnostics.map do |diagnostic| - message = diagnostic[:message] - issue_content = URI.encode_www_form_component(GITHUB_TEMPLATE % {error: message}) - issue_link = "https://github.com/ruby/yarp/issues/new?&labels=Bug&body=#{issue_content}" - - { - title: "Report incorrect error: `#{diagnostic[:message]}`", - kind: "quickfix", - diagnostics: [diagnostic], - command: { - title: "Report incorrect error", - command: "vscode.open", - arguments: [issue_link] - } - } - end - end - - def diagnostics(source) - offsets = Hash.new do |hash, key| - slice = source.byteslice(...key) - lineno = slice.count("\n") - - char = slice.length - newline = source.rindex("\n", [char - 1, 0].max) || -1 - hash[key] = { line: lineno, character: char - newline - 1 } - end - - parse_output = YARP.parse(source) - - { - kind: "full", - items: [ - *parse_output.errors.map do |error| - { - range: { - start: offsets[error.location.start_offset], - end: offsets[error.location.end_offset], - }, - message: error.message, - severity: 1, - } - end, - *parse_output.warnings.map do |warning| - { - range: { - start: offsets[warning.location.start_offset], - end: offsets[warning.location.end_offset], - }, - message: warning.message, - severity: 2, - } - end, - ] - } - end - - def write(value) - response = value.merge(jsonrpc: "2.0").to_json - output.print("Content-Length: #{response.bytesize}\r\n\r\n#{response}") - output.flush - end - end -end diff --git a/lib/yarp/lex_compat.rb b/lib/yarp/lex_compat.rb deleted file mode 100644 index 6ed7575ffd..0000000000 --- a/lib/yarp/lex_compat.rb +++ /dev/null @@ -1,842 +0,0 @@ -# frozen_string_literal: true - -require "delegate" - -module YARP - # This class is responsible for lexing the source using YARP and then - # converting those tokens to be compatible with Ripper. In the vast majority - # of cases, this is a one-to-one mapping of the token type. Everything else - # generally lines up. However, there are a few cases that require special - # handling. - class LexCompat - # This is a mapping of YARP token types to Ripper token types. This is a - # many-to-one mapping because we split up our token types, whereas Ripper - # tends to group them. - RIPPER = { - AMPERSAND: :on_op, - AMPERSAND_AMPERSAND: :on_op, - AMPERSAND_AMPERSAND_EQUAL: :on_op, - AMPERSAND_DOT: :on_op, - AMPERSAND_EQUAL: :on_op, - BACK_REFERENCE: :on_backref, - BACKTICK: :on_backtick, - BANG: :on_op, - BANG_EQUAL: :on_op, - BANG_TILDE: :on_op, - BRACE_LEFT: :on_lbrace, - BRACE_RIGHT: :on_rbrace, - BRACKET_LEFT: :on_lbracket, - BRACKET_LEFT_ARRAY: :on_lbracket, - BRACKET_LEFT_RIGHT: :on_op, - BRACKET_LEFT_RIGHT_EQUAL: :on_op, - BRACKET_RIGHT: :on_rbracket, - CARET: :on_op, - CARET_EQUAL: :on_op, - CHARACTER_LITERAL: :on_CHAR, - CLASS_VARIABLE: :on_cvar, - COLON: :on_op, - COLON_COLON: :on_op, - COMMA: :on_comma, - COMMENT: :on_comment, - CONSTANT: :on_const, - DOT: :on_period, - DOT_DOT: :on_op, - DOT_DOT_DOT: :on_op, - EMBDOC_BEGIN: :on_embdoc_beg, - EMBDOC_END: :on_embdoc_end, - EMBDOC_LINE: :on_embdoc, - EMBEXPR_BEGIN: :on_embexpr_beg, - EMBEXPR_END: :on_embexpr_end, - EMBVAR: :on_embvar, - EOF: :on_eof, - EQUAL: :on_op, - EQUAL_EQUAL: :on_op, - EQUAL_EQUAL_EQUAL: :on_op, - EQUAL_GREATER: :on_op, - EQUAL_TILDE: :on_op, - FLOAT: :on_float, - FLOAT_IMAGINARY: :on_imaginary, - FLOAT_RATIONAL: :on_rational, - FLOAT_RATIONAL_IMAGINARY: :on_imaginary, - GREATER: :on_op, - GREATER_EQUAL: :on_op, - GREATER_GREATER: :on_op, - GREATER_GREATER_EQUAL: :on_op, - GLOBAL_VARIABLE: :on_gvar, - HEREDOC_END: :on_heredoc_end, - HEREDOC_START: :on_heredoc_beg, - IDENTIFIER: :on_ident, - IGNORED_NEWLINE: :on_ignored_nl, - INTEGER: :on_int, - INTEGER_IMAGINARY: :on_imaginary, - INTEGER_RATIONAL: :on_rational, - INTEGER_RATIONAL_IMAGINARY: :on_imaginary, - INSTANCE_VARIABLE: :on_ivar, - INVALID: :INVALID, - KEYWORD___ENCODING__: :on_kw, - KEYWORD___LINE__: :on_kw, - KEYWORD___FILE__: :on_kw, - KEYWORD_ALIAS: :on_kw, - KEYWORD_AND: :on_kw, - KEYWORD_BEGIN: :on_kw, - KEYWORD_BEGIN_UPCASE: :on_kw, - KEYWORD_BREAK: :on_kw, - KEYWORD_CASE: :on_kw, - KEYWORD_CLASS: :on_kw, - KEYWORD_DEF: :on_kw, - KEYWORD_DEFINED: :on_kw, - KEYWORD_DO: :on_kw, - KEYWORD_DO_LOOP: :on_kw, - KEYWORD_ELSE: :on_kw, - KEYWORD_ELSIF: :on_kw, - KEYWORD_END: :on_kw, - KEYWORD_END_UPCASE: :on_kw, - KEYWORD_ENSURE: :on_kw, - KEYWORD_FALSE: :on_kw, - KEYWORD_FOR: :on_kw, - KEYWORD_IF: :on_kw, - KEYWORD_IF_MODIFIER: :on_kw, - KEYWORD_IN: :on_kw, - KEYWORD_MODULE: :on_kw, - KEYWORD_NEXT: :on_kw, - KEYWORD_NIL: :on_kw, - KEYWORD_NOT: :on_kw, - KEYWORD_OR: :on_kw, - KEYWORD_REDO: :on_kw, - KEYWORD_RESCUE: :on_kw, - KEYWORD_RESCUE_MODIFIER: :on_kw, - KEYWORD_RETRY: :on_kw, - KEYWORD_RETURN: :on_kw, - KEYWORD_SELF: :on_kw, - KEYWORD_SUPER: :on_kw, - KEYWORD_THEN: :on_kw, - KEYWORD_TRUE: :on_kw, - KEYWORD_UNDEF: :on_kw, - KEYWORD_UNLESS: :on_kw, - KEYWORD_UNLESS_MODIFIER: :on_kw, - KEYWORD_UNTIL: :on_kw, - KEYWORD_UNTIL_MODIFIER: :on_kw, - KEYWORD_WHEN: :on_kw, - KEYWORD_WHILE: :on_kw, - KEYWORD_WHILE_MODIFIER: :on_kw, - KEYWORD_YIELD: :on_kw, - LABEL: :on_label, - LABEL_END: :on_label_end, - LAMBDA_BEGIN: :on_tlambeg, - LESS: :on_op, - LESS_EQUAL: :on_op, - LESS_EQUAL_GREATER: :on_op, - LESS_LESS: :on_op, - LESS_LESS_EQUAL: :on_op, - METHOD_NAME: :on_ident, - MINUS: :on_op, - MINUS_EQUAL: :on_op, - MINUS_GREATER: :on_tlambda, - NEWLINE: :on_nl, - NUMBERED_REFERENCE: :on_backref, - PARENTHESIS_LEFT: :on_lparen, - PARENTHESIS_LEFT_PARENTHESES: :on_lparen, - PARENTHESIS_RIGHT: :on_rparen, - PERCENT: :on_op, - PERCENT_EQUAL: :on_op, - PERCENT_LOWER_I: :on_qsymbols_beg, - PERCENT_LOWER_W: :on_qwords_beg, - PERCENT_LOWER_X: :on_backtick, - PERCENT_UPPER_I: :on_symbols_beg, - PERCENT_UPPER_W: :on_words_beg, - PIPE: :on_op, - PIPE_EQUAL: :on_op, - PIPE_PIPE: :on_op, - PIPE_PIPE_EQUAL: :on_op, - PLUS: :on_op, - PLUS_EQUAL: :on_op, - QUESTION_MARK: :on_op, - RATIONAL_FLOAT: :on_rational, - RATIONAL_INTEGER: :on_rational, - REGEXP_BEGIN: :on_regexp_beg, - REGEXP_END: :on_regexp_end, - SEMICOLON: :on_semicolon, - SLASH: :on_op, - SLASH_EQUAL: :on_op, - STAR: :on_op, - STAR_EQUAL: :on_op, - STAR_STAR: :on_op, - STAR_STAR_EQUAL: :on_op, - STRING_BEGIN: :on_tstring_beg, - STRING_CONTENT: :on_tstring_content, - STRING_END: :on_tstring_end, - SYMBOL_BEGIN: :on_symbeg, - TILDE: :on_op, - UAMPERSAND: :on_op, - UCOLON_COLON: :on_op, - UDOT_DOT: :on_op, - UDOT_DOT_DOT: :on_op, - UMINUS: :on_op, - UMINUS_NUM: :on_op, - UPLUS: :on_op, - USTAR: :on_op, - USTAR_STAR: :on_op, - WORDS_SEP: :on_words_sep, - "__END__": :on___end__ - }.freeze - - # When we produce tokens, we produce the same arrays that Ripper does. - # However, we add a couple of convenience methods onto them to make them a - # little easier to work with. We delegate all other methods to the array. - class Token < SimpleDelegator - def location - self[0] - end - - def event - self[1] - end - - def value - self[2] - end - - def state - self[3] - end - end - - # Ripper doesn't include the rest of the token in the event, so we need to - # trim it down to just the content on the first line when comparing. - class EndContentToken < Token - def ==(other) - [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other - end - end - - # Tokens where state should be ignored - # used for :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) - self[0...-1] == other[0...-1] - end - end - - # Ident tokens for the most part are exactly the same, except sometimes we - # know an ident is a local when ripper doesn't (when they are introduced - # through named captures in regular expressions). In that case we don't - # compare the state. - class IdentToken < Token - def ==(other) - (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || - (other[3] & Ripper::EXPR_ARG_ANY != 0) - ) - end - end - - # Ignored newlines can occasionally have a LABEL state attached to them, so - # we compare the state differently here. - class IgnoredNewlineToken < Token - def ==(other) - return false unless self[0...-1] == other[0...-1] - - if self[4] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED - other[4] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED > 0 - else - self[4] == other[4] - end - end - end - - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a parent - # scope named bar because it hasn't pushed the local table yet. We do this - # more accurately, so we need to allow comparing against both END and - # END|LABEL. - class ParamToken < Token - def ==(other) - (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_END) || - (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) - ) - end - end - - # A heredoc in this case is a list of tokens that belong to the body of the - # heredoc that should be appended onto the list of tokens when the heredoc - # closes. - module Heredoc - # Heredocs that are no dash or tilde heredocs are just a list of tokens. - # We need to keep them around so that we can insert them in the correct - # order back into the token stream and set the state of the last token to - # the state that the heredoc was opened in. - class PlainHeredoc - attr_reader :tokens - - def initialize - @tokens = [] - end - - def <<(token) - tokens << token - end - - def to_a - tokens - end - end - - # Dash heredocs are a little more complicated. They are a list of tokens - # that need to be split on "\\\n" to mimic Ripper's behavior. We also need - # to keep track of the state that the heredoc was opened in. - class DashHeredoc - attr_reader :split, :tokens - - def initialize(split) - @split = split - @tokens = [] - end - - def <<(token) - tokens << token - end - - def to_a - embexpr_balance = 0 - - tokens.each_with_object([]) do |token, results| - case token.event - when :on_embexpr_beg - embexpr_balance += 1 - results << token - when :on_embexpr_end - embexpr_balance -= 1 - results << token - when :on_tstring_content - if embexpr_balance == 0 - lineno = token[0][0] - column = token[0][1] - - if split - # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind - # to keep the delimiter in the result. - token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| - column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) - lineno += value.count("\n") - end - else - results << token - end - else - results << token - end - else - results << token - end - end - end - end - - # Heredocs that are dedenting heredocs are a little more complicated. - # Ripper outputs on_ignored_sp tokens for the whitespace that is being - # removed from the output. YARP only modifies the node itself and keeps - # the token the same. This simplifies YARP, but makes comparing against - # Ripper much harder because there is a length mismatch. - # - # Fortunately, we already have to pull out the heredoc tokens in order to - # insert them into the stream in the correct order. As such, we can do - # some extra manipulation on the tokens to make them match Ripper's - # output by mirroring the dedent logic that Ripper uses. - class DedentingHeredoc - TAB_WIDTH = 8 - - attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance - - def initialize - @tokens = [] - @dedent_next = true - @dedent = nil - @embexpr_balance = 0 - end - - # As tokens are coming in, we track the minimum amount of common leading - # whitespace on plain string content tokens. This allows us to later - # remove that amount of whitespace from the beginning of each line. - def <<(token) - case token.event - when :on_embexpr_beg, :on_heredoc_beg - @embexpr_balance += 1 - when :on_embexpr_end, :on_heredoc_end - @embexpr_balance -= 1 - when :on_tstring_content - if embexpr_balance == 0 - token.value.split(/(?<=\n)/).each_with_index do |line, index| - next if line.strip.empty? && line.end_with?("\n") - next if !(dedent_next || index > 0) - - leading = line[/\A(\s*)\n?/, 1] - next_dedent = 0 - - leading.each_char do |char| - if char == "\t" - next_dedent = next_dedent - (next_dedent % TAB_WIDTH) + TAB_WIDTH - else - next_dedent += 1 - end - end - - @dedent = [dedent, next_dedent].compact.min - end - end - end - - @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 - tokens << token - end - - def to_a - # If every line in the heredoc is blank, we still need to split up the - # string content token into multiple tokens. - if dedent.nil? - results = [] - embexpr_balance = 0 - - tokens.each do |token| - case token.event - when :on_embexpr_beg, :on_heredoc_beg - embexpr_balance += 1 - results << token - when :on_embexpr_end, :on_heredoc_end - embexpr_balance -= 1 - results << token - when :on_tstring_content - if embexpr_balance == 0 - lineno = token[0][0] - column = token[0][1] - - token.value.split(/(?<=\n)/).each_with_index do |value, index| - column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) - lineno += 1 - end - else - results << token - end - else - results << token - end - end - - return results - end - - # Otherwise, we're going to run through each token in the list and - # insert on_ignored_sp tokens for the amount of dedent that we need to - # perform. We also need to remove the dedent from the beginning of - # each line of plain string content tokens. - results = [] - dedent_next = true - embexpr_balance = 0 - - tokens.each do |token| - # Notice that the structure of this conditional largely matches the - # whitespace calculation we performed above. This is because - # checking if the subsequent token needs to be dedented is common to - # both the dedent calculation and the ignored_sp insertion. - case token.event - when :on_embexpr_beg - embexpr_balance += 1 - results << token - when :on_embexpr_end - embexpr_balance -= 1 - results << token - when :on_tstring_content - if embexpr_balance == 0 - # Here we're going to split the string on newlines, but maintain - # the newlines in the resulting array. We'll do that with a look - # behind assertion. - splits = token.value.split(/(?<=\n)/) - index = 0 - - while index < splits.length - line = splits[index] - lineno = token[0][0] + index - column = token[0][1] - - # Blank lines do not count toward common leading whitespace - # calculation and do not need to be dedented. - if dedent_next || index > 0 - column = 0 - end - - # If the dedent is 0 and we're not supposed to dedent the next - # line or this line doesn't start with whitespace, then we - # should concatenate the rest of the string to match ripper. - if dedent == 0 && (!dedent_next || !line.start_with?(/\s/)) - line = splits[index..].join - index = splits.length - end - - # If we are supposed to dedent this line or if this is not the - # first line of the string and this line isn't entirely blank, - # then we need to insert an on_ignored_sp token and remove the - # dedent from the beginning of the line. - if (dedent > 0) && (dedent_next || index > 0) - deleting = 0 - deleted_chars = [] - - # Gather up all of the characters that we're going to - # delete, stopping when you hit a character that would put - # you over the dedent amount. - line.each_char.with_index do |char, i| - case char - when "\r" - if line.chars[i + 1] == "\n" - break - end - when "\n" - break - when "\t" - deleting = deleting - (deleting % TAB_WIDTH) + TAB_WIDTH - else - deleting += 1 - end - - break if deleting > dedent - deleted_chars << char - end - - # If we have something to delete, then delete it from the - # string and insert an on_ignored_sp token. - if deleted_chars.any? - ignored = deleted_chars.join - line.delete_prefix!(ignored) - - results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) - column = ignored.length - end - end - - results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? - index += 1 - end - else - results << token - end - else - results << token - end - - dedent_next = - ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && - embexpr_balance == 0 - end - - results - end - end - - # Here we will split between the two types of heredocs and return the - # object that will store their tokens. - def self.build(opening) - case opening.value[2] - when "~" - DedentingHeredoc.new - when "-" - DashHeredoc.new(opening.value[3] != "'") - else - PlainHeredoc.new - end - end - end - - attr_reader :source, :filepath - - def initialize(source, filepath = "") - @source = source - @filepath = filepath || "" - end - - def result - tokens = [] - - state = :default - heredoc_stack = [[]] - - result = YARP.lex(source, @filepath) - result_value = result.value - previous_state = nil - - # In previous versions of Ruby, Ripper wouldn't flush the bom before the - # first token, so we had to have a hack in place to account for that. This - # checks for that behavior. - bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 - bom = source.byteslice(0..2) == "\xEF\xBB\xBF" - - result_value.each_with_index do |(token, lex_state), index| - lineno = token.location.start_line - column = token.location.start_column - - # If there's a UTF-8 byte-order mark as the start of the file, then for - # certain tokens ripper sets the first token back by 3 bytes. It also - # keeps the byte order mark in the first token's value. This is weird, - # and I don't want to mirror that in our parser. So instead, we'll match - # up the columns and values here. - if bom && lineno == 1 - column -= 3 - - if index == 0 && column == 0 && !bom_flushed - flushed = - case token.type - when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, - :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I, - :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I, - :PERCENT_UPPER_W, :STRING_BEGIN - true - when :REGEXP_BEGIN, :SYMBOL_BEGIN - token.value.start_with?("%") - else - false - end - - unless flushed - column -= 3 - value = token.value - value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding)) - end - end - end - - event = RIPPER.fetch(token.type) - value = token.value - lex_state = Ripper::Lexer::State.new(lex_state) - - token = - case event - when :on___end__ - EndContentToken.new([[lineno, column], event, value, lex_state]) - when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_heredoc_end - # Heredoc end tokens can be emitted in an odd order, so we don't - # want to bother comparing the state on them. - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ident - if lex_state == Ripper::EXPR_END - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a - # parent scope named bar because it hasn't pushed the local table - # yet. We do this more accurately, so we need to allow comparing - # against both END and END|LABEL. - ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL - # In the event that we're comparing identifiers, we're going to - # allow a little divergence. Ripper doesn't account for local - # variables introduced through named captures in regexes, and we - # do, which accounts for this difference. - IdentToken.new([[lineno, column], event, value, lex_state]) - else - Token.new([[lineno, column], event, value, lex_state]) - end - when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ignored_nl - # Ignored newlines can occasionally have a LABEL state attached to - # them which doesn't actually impact anything. We don't mirror that - # state so we ignored it. - IgnoredNewlineToken.new([[lineno, column], event, value, lex_state]) - when :on_regexp_end - # On regex end, Ripper scans and then sets end state, so the ripper - # lexed output is begin, when it should be end. YARP sets lex state - # correctly to end state, but we want to be able to compare against - # Ripper's lexed state. So here, if it's a regexp end token, we - # output the state as the previous state, solely for the sake of - # comparison. - previous_token = result_value[index - 1][0] - lex_state = - if RIPPER.fetch(previous_token.type) == :on_embexpr_end - # If the previous token is embexpr_end, then we have to do even - # more processing. The end of an embedded expression sets the - # state to the state that it had at the beginning of the - # embedded expression. So we have to go and find that state and - # set it here. - counter = 1 - current_index = index - 1 - - until counter == 0 - current_index -= 1 - current_event = RIPPER.fetch(result_value[current_index][0].type) - counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 - end - - Ripper::Lexer::State.new(result_value[current_index][1]) - else - previous_state - end - - Token.new([[lineno, column], event, value, lex_state]) - when :on_eof - previous_token = result_value[index - 1][0] - - # If we're at the end of the file and the previous token was a - # comment and there is still whitespace after the comment, then - # Ripper will append a on_nl token (even though there isn't - # necessarily a newline). We mirror that here. - start_offset = previous_token.location.end_offset - end_offset = token.location.start_offset - - if previous_token.type == :COMMENT && start_offset < end_offset - if bom - start_offset += 3 - end_offset += 3 - end - - tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state]) - end - - Token.new([[lineno, column], event, value, lex_state]) - else - Token.new([[lineno, column], event, value, lex_state]) - end - - previous_state = lex_state - - # The order in which tokens appear in our lexer is different from the - # order that they appear in Ripper. When we hit the declaration of a - # heredoc in YARP, we skip forward and lex the rest of the content of - # the heredoc before going back and lexing at the end of the heredoc - # identifier. - # - # To match up to ripper, we keep a small state variable around here to - # track whether we're in the middle of a heredoc or not. In this way we - # can shuffle around the token to match Ripper's output. - case state - when :default - # The default state is when there are no heredocs at all. In this - # state we can append the token to the list of tokens and move on. - tokens << token - - # If we get the declaration of a heredoc, then we open a new heredoc - # and move into the heredoc_opened state. - if event == :on_heredoc_beg - state = :heredoc_opened - heredoc_stack.last << Heredoc.build(token) - end - when :heredoc_opened - # The heredoc_opened state is when we've seen the declaration of a - # heredoc and are now lexing the body of the heredoc. In this state we - # push tokens onto the most recently created heredoc. - heredoc_stack.last.last << token - - case event - when :on_heredoc_beg - # If we receive a heredoc declaration while lexing the body of a - # heredoc, this means we have nested heredocs. In this case we'll - # push a new heredoc onto the stack and stay in the heredoc_opened - # state since we're now lexing the body of the new heredoc. - heredoc_stack << [Heredoc.build(token)] - when :on_heredoc_end - # If we receive the end of a heredoc, then we're done lexing the - # body of the heredoc. In this case we now have a completed heredoc - # but need to wait for the next newline to push it into the token - # stream. - state = :heredoc_closed - end - when :heredoc_closed - if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n")) - if heredoc_stack.size > 1 - flushing = heredoc_stack.pop - heredoc_stack.last.last << token - - flushing.each do |heredoc| - heredoc.to_a.each do |flushed_token| - heredoc_stack.last.last << flushed_token - end - end - - state = :heredoc_opened - next - end - elsif event == :on_heredoc_beg - tokens << token - state = :heredoc_opened - heredoc_stack.last << Heredoc.build(token) - next - elsif heredoc_stack.size > 1 - heredoc_stack[-2].last << token - next - end - - heredoc_stack.last.each do |heredoc| - tokens.concat(heredoc.to_a) - end - - heredoc_stack.last.clear - state = :default - - tokens << token - end - end - - # Drop the EOF token from the list - tokens = tokens[0...-1] - - # We sort by location to compare against Ripper's output - tokens.sort_by!(&:location) - - if result_value.size - 1 > tokens.size - raise StandardError, "Lost tokens when performing lex_compat" - end - - ParseResult.new(tokens, result.comments, result.errors, result.warnings, []) - end - end - - # The constant that wraps the behavior of the lexer to match Ripper's output - # is an implementation detail, so we don't want it to be public. - private_constant :LexCompat - - # Returns an array of tokens that closely resembles that of the Ripper lexer. - # The only difference is that since we don't keep track of lexer state in the - # same way, it's going to always return the NONE state. - def self.lex_compat(source, filepath = "") - LexCompat.new(source, filepath).result - end - - # This lexes with the Ripper lex. It drops any space events but otherwise - # returns the same tokens. Raises SyntaxError if the syntax in source is - # invalid. - def self.lex_ripper(source) - previous = [] - results = [] - - Ripper.lex(source, raise_errors: true).each do |token| - case token[1] - when :on_sp - # skip - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - when :on_words_sep - if previous[1] == :on_words_sep - previous[2] << token[2] - else - results << token - previous = token - end - else - results << token - previous = token - end - end - - results - end -end diff --git a/lib/yarp/pack.rb b/lib/yarp/pack.rb deleted file mode 100644 index 83f5569923..0000000000 --- a/lib/yarp/pack.rb +++ /dev/null @@ -1,185 +0,0 @@ -# frozen_string_literal: true - -module YARP - module Pack - %i[ - SPACE - COMMENT - INTEGER - UTF8 - BER - FLOAT - STRING_SPACE_PADDED - STRING_NULL_PADDED - STRING_NULL_TERMINATED - STRING_MSB - STRING_LSB - STRING_HEX_HIGH - STRING_HEX_LOW - STRING_UU - STRING_MIME - STRING_BASE64 - STRING_FIXED - STRING_POINTER - MOVE - BACK - NULL - - UNSIGNED - SIGNED - SIGNED_NA - - AGNOSTIC_ENDIAN - LITTLE_ENDIAN - BIG_ENDIAN - NATIVE_ENDIAN - ENDIAN_NA - - SIZE_SHORT - SIZE_INT - SIZE_LONG - SIZE_LONG_LONG - SIZE_8 - SIZE_16 - SIZE_32 - SIZE_64 - SIZE_P - SIZE_NA - - LENGTH_FIXED - LENGTH_MAX - LENGTH_RELATIVE - LENGTH_NA - ].each do |const| - const_set(const, const) - end - - class Directive - attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length - - def initialize(version, variant, source, type, signed, endian, size, length_type, length) - @version = version - @variant = variant - @source = source - @type = type - @signed = signed - @endian = endian - @size = size - @length_type = length_type - @length = length - end - - ENDIAN_DESCRIPTIONS = { - AGNOSTIC_ENDIAN: 'agnostic', - LITTLE_ENDIAN: 'little-endian (VAX)', - BIG_ENDIAN: 'big-endian (network)', - NATIVE_ENDIAN: 'native-endian', - ENDIAN_NA: 'n/a' - } - - SIGNED_DESCRIPTIONS = { - UNSIGNED: 'unsigned', - SIGNED: 'signed', - SIGNED_NA: 'n/a' - } - - SIZE_DESCRIPTIONS = { - SIZE_SHORT: 'short', - SIZE_INT: 'int-width', - SIZE_LONG: 'long', - SIZE_LONG_LONG: 'long long', - SIZE_8: '8-bit', - SIZE_16: '16-bit', - SIZE_32: '32-bit', - SIZE_64: '64-bit', - SIZE_P: 'pointer-width' - } - - def describe - case type - when SPACE - 'whitespace' - when COMMENT - 'comment' - when INTEGER - if size == SIZE_8 - base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer" - else - base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer" - end - case length_type - when LENGTH_FIXED - if length > 1 - base + ", x#{length}" - else - base - end - when LENGTH_MAX - base + ', as many as possible' - end - when UTF8 - 'UTF-8 character' - when BER - 'BER-compressed integer' - when FLOAT - "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float" - when STRING_SPACE_PADDED - 'arbitrary binary string (space padded)' - when STRING_NULL_PADDED - 'arbitrary binary string (null padded, count is width)' - when STRING_NULL_TERMINATED - 'arbitrary binary string (null padded, count is width), except that null is added with *' - when STRING_MSB - 'bit string (MSB first)' - when STRING_LSB - 'bit string (LSB first)' - when STRING_HEX_HIGH - 'hex string (high nibble first)' - when STRING_HEX_LOW - 'hex string (low nibble first)' - when STRING_UU - 'UU-encoded string' - when STRING_MIME - 'quoted printable, MIME encoding' - when STRING_BASE64 - 'base64 encoded string' - when STRING_FIXED - 'pointer to a structure (fixed-length string)' - when STRING_POINTER - 'pointer to a null-terminated string' - when MOVE - 'move to absolute position' - when BACK - 'back up a byte' - when NULL - 'null byte' - else - raise - end - end - end - - class Format - attr_reader :directives, :encoding - - def initialize(directives, encoding) - @directives = directives - @encoding = encoding - end - - def describe - source_width = directives.map { |d| d.source.inspect.length }.max - directive_lines = directives.map do |directive| - if directive.type == SPACE - source = directive.source.inspect - else - source = directive.source - end - " #{source.ljust(source_width)} #{directive.describe}" - end - - (['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n") - end - end - end -end diff --git a/lib/yarp/parse_result/comments.rb b/lib/yarp/parse_result/comments.rb deleted file mode 100644 index 88240609b1..0000000000 --- a/lib/yarp/parse_result/comments.rb +++ /dev/null @@ -1,172 +0,0 @@ -# frozen_string_literal: true - -module YARP - class ParseResult - # When we've parsed the source, we have both the syntax tree and the list of - # comments that we found in the source. This class is responsible for - # walking the tree and finding the nearest location to attach each comment. - # - # It does this by first finding the nearest locations to each comment. - # Locations can either come from nodes directly or from location fields on - # nodes. For example, a `ClassNode` has an overall location encompassing the - # entire class, but it also has a location for the `class` keyword. - # - # Once the nearest locations are found, it determines which one to attach - # to. If it's a trailing comment (a comment on the same line as other source - # code), it will favor attaching to the nearest location that occurs before - # the comment. Otherwise it will favor attaching to the nearest location - # that is after the comment. - class Comments - # A target for attaching comments that is based on a specific node's - # location. - class NodeTarget - attr_reader :node - - def initialize(node) - @node = node - end - - def start_offset - node.location.start_offset - end - - def end_offset - node.location.end_offset - end - - def encloses?(comment) - start_offset <= comment.location.start_offset && - comment.location.end_offset <= end_offset - end - - def <<(comment) - node.location.comments << comment - end - end - - # A target for attaching comments that is based on a location field on a - # node. For example, the `end` token of a ClassNode. - class LocationTarget - attr_reader :location - - def initialize(location) - @location = location - end - - def start_offset - location.start_offset - end - - def end_offset - location.end_offset - end - - def encloses?(comment) - false - end - - def <<(comment) - location.comments << comment - end - end - - attr_reader :parse_result - - def initialize(parse_result) - @parse_result = parse_result - end - - def attach! - parse_result.comments.each do |comment| - preceding, enclosing, following = nearest_targets(parse_result.value, comment) - target = - if comment.trailing? - preceding || following || enclosing || NodeTarget.new(parse_result.value) - else - # If a comment exists on its own line, prefer a leading comment. - following || preceding || enclosing || NodeTarget.new(parse_result.value) - end - - target << comment - end - end - - private - - # Responsible for finding the nearest targets to the given comment within - # the context of the given encapsulating node. - def nearest_targets(node, comment) - comment_start = comment.location.start_offset - comment_end = comment.location.end_offset - - targets = [] - node.comment_targets.map do |value| - case value - when StatementsNode - targets.concat(value.body.map { |node| NodeTarget.new(node) }) - when Node - targets << NodeTarget.new(value) - when Location - targets << LocationTarget.new(value) - end - end - - targets.sort_by!(&:start_offset) - preceding = nil - following = nil - - left = 0 - right = targets.length - - # This is a custom binary search that finds the nearest nodes to the - # given comment. When it finds a node that completely encapsulates the - # comment, it recurses downward into the tree. - while left < right - middle = (left + right) / 2 - target = targets[middle] - - target_start = target.start_offset - target_end = target.end_offset - - if target.encloses?(comment) - # The comment is completely contained by this target. Abandon the - # binary search at this level. - return nearest_targets(target.node, comment) - end - - if target_end <= comment_start - # This target falls completely before the comment. Because we will - # never consider this target or any targets before it again, this - # target must be the closest preceding target we have encountered so - # far. - preceding = target - left = middle + 1 - next - end - - if comment_end <= target_start - # This target falls completely after the comment. Because we will - # never consider this target or any targets after it again, this - # target must be the closest following target we have encountered so - # far. - following = target - right = middle - next - end - - # This should only happen if there is a bug in this parser. - raise "Comment location overlaps with a target location" - end - - [preceding, NodeTarget.new(node), following] - end - end - - private_constant :Comments - - # Attach the list of comments to their respective locations in the tree. - def attach_comments! - Comments.new(self).attach! - end - end -end diff --git a/lib/yarp/parse_result/newlines.rb b/lib/yarp/parse_result/newlines.rb deleted file mode 100644 index d16600afd0..0000000000 --- a/lib/yarp/parse_result/newlines.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -module YARP - class ParseResult - # The :line tracepoint event gets fired whenever the Ruby VM encounters an - # expression on a new line. The types of expressions that can trigger this - # event are: - # - # * if statements - # * unless statements - # * nodes that are children of statements lists - # - # In order to keep track of the newlines, we have a list of offsets that - # come back from the parser. We assign these offsets to the first nodes that - # we find in the tree that are on those lines. - # - # Note that the logic in this file should be kept in sync with the Java - # MarkNewlinesVisitor, since that visitor is responsible for marking the - # newlines for JRuby/TruffleRuby. - class Newlines < Visitor - def initialize(newline_marked) - @newline_marked = newline_marked - end - - def visit_block_node(node) - old_newline_marked = @newline_marked - @newline_marked = Array.new(old_newline_marked.size, false) - - begin - super(node) - ensure - @newline_marked = old_newline_marked - end - end - - alias_method :visit_lambda_node, :visit_block_node - - def visit_if_node(node) - node.set_newline_flag(@newline_marked) - super(node) - end - - alias_method :visit_unless_node, :visit_if_node - - def visit_statements_node(node) - node.body.each do |child| - child.set_newline_flag(@newline_marked) - end - super(node) - end - end - - private_constant :Newlines - - # Walk the tree and mark nodes that are on a new line. - def mark_newlines! - value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) - end - end -end diff --git a/lib/yarp/pattern.rb b/lib/yarp/pattern.rb deleted file mode 100644 index f7519137e4..0000000000 --- a/lib/yarp/pattern.rb +++ /dev/null @@ -1,239 +0,0 @@ -# frozen_string_literal: true - -module YARP - # A pattern is an object that wraps a Ruby pattern matching expression. The - # expression would normally be passed to an `in` clause within a `case` - # expression or a rightward assignment expression. For example, in the - # following snippet: - # - # case node - # in ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]] - # end - # - # the pattern is the `ConstantPathNode[...]` expression. - # - # The pattern gets compiled into an object that responds to #call by running - # the #compile method. This method itself will run back through YARP to - # parse the expression into a tree, then walk the tree to generate the - # necessary callable objects. For example, if you wanted to compile the - # expression above into a callable, you would: - # - # callable = YARP::Pattern.new("ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]").compile - # callable.call(node) - # - # The callable object returned by #compile is guaranteed to respond to #call - # with a single argument, which is the node to match against. It also is - # guaranteed to respond to #===, which means it itself can be used in a `case` - # expression, as in: - # - # case node - # when callable - # end - # - # If the query given to the initializer cannot be compiled into a valid - # matcher (either because of a syntax error or because it is using syntax we - # do not yet support) then a YARP::Pattern::CompilationError will be - # raised. - class Pattern - # Raised when the query given to a pattern is either invalid Ruby syntax or - # is using syntax that we don't yet support. - class CompilationError < StandardError - def initialize(repr) - super(<<~ERROR) - YARP was unable to compile the pattern you provided into a usable - expression. It failed on to understand the node represented by: - - #{repr} - - Note that not all syntax supported by Ruby's pattern matching syntax - is also supported by YARP's patterns. If you're using some syntax - that you believe should be supported, please open an issue on - GitHub at https://github.com/ruby/yarp/issues/new. - ERROR - end - end - - attr_reader :query - - def initialize(query) - @query = query - @compiled = nil - end - - def compile - result = YARP.parse("case nil\nin #{query}\nend") - compile_node(result.value.statements.body.last.conditions.last.pattern) - end - - def scan(root) - return to_enum(__method__, root) unless block_given? - - @compiled ||= compile - queue = [root] - - while (node = queue.shift) - yield node if @compiled.call(node) - queue.concat(node.compact_child_nodes) - end - end - - private - - # Shortcut for combining two procs into one that returns true if both return - # true. - def combine_and(left, right) - ->(other) { left.call(other) && right.call(other) } - end - - # Shortcut for combining two procs into one that returns true if either - # returns true. - def combine_or(left, right) - ->(other) { left.call(other) || right.call(other) } - end - - # Raise an error because the given node is not supported. - def compile_error(node) - raise CompilationError, node.inspect - end - - # in [foo, bar, baz] - def compile_array_pattern_node(node) - compile_error(node) if !node.rest.nil? || node.posts.any? - - constant = node.constant - compiled_constant = compile_node(constant) if constant - - preprocessed = node.requireds.map { |required| compile_node(required) } - - compiled_requireds = ->(other) do - deconstructed = other.deconstruct - - deconstructed.length == preprocessed.length && - preprocessed - .zip(deconstructed) - .all? { |(matcher, value)| matcher.call(value) } - end - - if compiled_constant - combine_and(compiled_constant, compiled_requireds) - else - compiled_requireds - end - end - - # in foo | bar - def compile_alternation_pattern_node(node) - combine_or(compile_node(node.left), compile_node(node.right)) - end - - # in YARP::ConstantReadNode - def compile_constant_path_node(node) - parent = node.parent - - if parent.is_a?(ConstantReadNode) && parent.slice == "YARP" - compile_node(node.child) - else - compile_error(node) - end - end - - # in ConstantReadNode - # in String - def compile_constant_read_node(node) - value = node.slice - - if YARP.const_defined?(value, false) - clazz = YARP.const_get(value) - - ->(other) { clazz === other } - elsif Object.const_defined?(value, false) - clazz = Object.const_get(value) - - ->(other) { clazz === other } - else - compile_error(node) - end - end - - # in InstanceVariableReadNode[name: Symbol] - # in { name: Symbol } - def compile_hash_pattern_node(node) - compile_error(node) unless node.kwrest.nil? - compiled_constant = compile_node(node.constant) if node.constant - - preprocessed = - node.assocs.to_h do |assoc| - [assoc.key.unescaped.to_sym, compile_node(assoc.value)] - end - - compiled_keywords = ->(other) do - deconstructed = other.deconstruct_keys(preprocessed.keys) - - preprocessed.all? do |keyword, matcher| - deconstructed.key?(keyword) && matcher.call(deconstructed[keyword]) - end - end - - if compiled_constant - combine_and(compiled_constant, compiled_keywords) - else - compiled_keywords - end - end - - # in nil - def compile_nil_node(node) - ->(attribute) { attribute.nil? } - end - - # in /foo/ - def compile_regular_expression_node(node) - regexp = Regexp.new(node.unescaped, node.closing[1..]) - - ->(attribute) { regexp === attribute } - end - - # in "" - # in "foo" - def compile_string_node(node) - string = node.unescaped - - ->(attribute) { string === attribute } - end - - # in :+ - # in :foo - def compile_symbol_node(node) - symbol = node.unescaped.to_sym - - ->(attribute) { symbol === attribute } - end - - # Compile any kind of node. Dispatch out to the individual compilation - # methods based on the type of node. - def compile_node(node) - case node - when AlternationPatternNode - compile_alternation_pattern_node(node) - when ArrayPatternNode - compile_array_pattern_node(node) - when ConstantPathNode - compile_constant_path_node(node) - when ConstantReadNode - compile_constant_read_node(node) - when HashPatternNode - compile_hash_pattern_node(node) - when NilNode - compile_nil_node(node) - when RegularExpressionNode - compile_regular_expression_node(node) - when StringNode - compile_string_node(node) - when SymbolNode - compile_symbol_node(node) - else - compile_error(node) - end - end - end -end diff --git a/lib/yarp/ripper_compat.rb b/lib/yarp/ripper_compat.rb deleted file mode 100644 index c76f3fd07a..0000000000 --- a/lib/yarp/ripper_compat.rb +++ /dev/null @@ -1,174 +0,0 @@ -# frozen_string_literal: true - -require "ripper" - -module YARP - # This class is meant to provide a compatibility layer between YARP and - # Ripper. It functions by parsing the entire tree first and then walking it - # and executing each of the Ripper callbacks as it goes. - # - # This class is going to necessarily be slower than the native Ripper API. It - # is meant as a stopgap until developers migrate to using YARP. It is also - # meant as a test harness for the YARP parser. - class RipperCompat - # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that - # returns the arrays of [type, *children]. - class SexpBuilder < RipperCompat - private - - Ripper::PARSER_EVENTS.each do |event| - define_method(:"on_#{event}") do |*args| - [event, *args] - end - end - - Ripper::SCANNER_EVENTS.each do |event| - define_method(:"on_#{event}") do |value| - [:"@#{event}", value, [lineno, column]] - end - end - end - - # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that - # returns the same values as ::Ripper::SexpBuilder except with a couple of - # niceties that flatten linked lists into arrays. - class SexpBuilderPP < SexpBuilder - private - - def _dispatch_event_new - [] - end - - def _dispatch_event_push(list, item) - list << item - list - end - - Ripper::PARSER_EVENT_TABLE.each do |event, arity| - case event - when /_new\z/ - alias_method :"on_#{event}", :_dispatch_event_new if arity == 0 - when /_add\z/ - alias_method :"on_#{event}", :_dispatch_event_push - end - end - end - - attr_reader :source, :lineno, :column - - def initialize(source) - @source = source - @result = nil - @lineno = nil - @column = nil - end - - ############################################################################ - # Public interface - ############################################################################ - - def error? - result.errors.any? - end - - def parse - result.value.accept(self) unless error? - end - - ############################################################################ - # Visitor methods - ############################################################################ - - def visit(node) - node&.accept(self) - end - - def visit_call_node(node) - if !node.opening_loc && node.arguments.arguments.length == 1 - bounds(node.receiver.location) - left = visit(node.receiver) - - bounds(node.arguments.arguments.first.location) - right = visit(node.arguments.arguments.first) - - on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right) - else - raise NotImplementedError - end - end - - def visit_integer_node(node) - bounds(node.location) - on_int(source[node.location.start_offset...node.location.end_offset]) - end - - def visit_statements_node(node) - bounds(node.location) - node.body.inject(on_stmts_new) do |stmts, stmt| - on_stmts_add(stmts, visit(stmt)) - end - end - - def visit_token(node) - bounds(node.location) - - case node.type - when :MINUS - on_op(node.value) - when :PLUS - on_op(node.value) - else - raise NotImplementedError, "Unknown token: #{node.type}" - end - end - - def visit_program_node(node) - bounds(node.location) - on_program(visit(node.statements)) - end - - ############################################################################ - # Entrypoints for subclasses - ############################################################################ - - # This is a convenience method that runs the SexpBuilder subclass parser. - def self.sexp_raw(source) - SexpBuilder.new(source).parse - end - - # This is a convenience method that runs the SexpBuilderPP subclass parser. - def self.sexp(source) - SexpBuilderPP.new(source).parse - end - - private - - # This method is responsible for updating lineno and column information - # to reflect the current node. - # - # This method could be drastically improved with some caching on the start - # of every line, but for now it's good enough. - def bounds(location) - start_offset = location.start_offset - - @lineno = source[0..start_offset].count("\n") + 1 - @column = start_offset - (source.rindex("\n", start_offset) || 0) - end - - def result - @result ||= YARP.parse(source) - end - - def _dispatch0; end - def _dispatch1(_); end - def _dispatch2(_, _); end - def _dispatch3(_, _, _); end - def _dispatch4(_, _, _, _); end - def _dispatch5(_, _, _, _, _); end - def _dispatch7(_, _, _, _, _, _, _); end - - (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity| - alias_method :"on_#{event}", :"_dispatch#{arity}" - end - end -end diff --git a/lib/yarp/version.rb b/lib/yarp/version.rb deleted file mode 100644 index e450bfb526..0000000000 --- a/lib/yarp/version.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -module YARP - VERSION = "0.8.0" -end diff --git a/lib/yarp/yarp.gemspec b/lib/yarp/yarp.gemspec deleted file mode 100644 index 186ce7556d..0000000000 --- a/lib/yarp/yarp.gemspec +++ /dev/null @@ -1,105 +0,0 @@ -# frozen_string_literal: true - -Gem::Specification.new do |spec| - spec.name = "yarp" - spec.version = "0.12.0" - spec.authors = ["Shopify"] - spec.email = ["ruby@shopify.com"] - - spec.summary = "Yet Another Ruby Parser" - spec.homepage = "https://github.com/ruby/yarp" - spec.license = "MIT" - - spec.required_ruby_version = ">= 3.0.0" - - spec.require_paths = ["lib"] - spec.files = [ - "CHANGELOG.md", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.md", - "LICENSE.md", - "Makefile", - "README.md", - "config.yml", - "docs/build_system.md", - "docs/building.md", - "docs/configuration.md", - "docs/design.md", - "docs/encoding.md", - "docs/fuzzing.md", - "docs/heredocs.md", - "docs/mapping.md", - "docs/ripper.md", - "docs/ruby_api.md", - "docs/serialization.md", - "docs/testing.md", - "ext/yarp/api_node.c", - "ext/yarp/api_pack.c", - "ext/yarp/extension.c", - "ext/yarp/extension.h", - "include/yarp.h", - "include/yarp/ast.h", - "include/yarp/defines.h", - "include/yarp/diagnostic.h", - "include/yarp/enc/yp_encoding.h", - "include/yarp/node.h", - "include/yarp/pack.h", - "include/yarp/parser.h", - "include/yarp/regexp.h", - "include/yarp/unescape.h", - "include/yarp/util/yp_buffer.h", - "include/yarp/util/yp_char.h", - "include/yarp/util/yp_constant_pool.h", - "include/yarp/util/yp_list.h", - "include/yarp/util/yp_memchr.h", - "include/yarp/util/yp_newline_list.h", - "include/yarp/util/yp_state_stack.h", - "include/yarp/util/yp_string.h", - "include/yarp/util/yp_string_list.h", - "include/yarp/util/yp_strpbrk.h", - "include/yarp/version.h", - "lib/yarp.rb", - "lib/yarp/desugar_visitor.rb", - "lib/yarp/ffi.rb", - "lib/yarp/lex_compat.rb", - "lib/yarp/mutation_visitor.rb", - "lib/yarp/node.rb", - "lib/yarp/pack.rb", - "lib/yarp/pattern.rb", - "lib/yarp/ripper_compat.rb", - "lib/yarp/serialize.rb", - "lib/yarp/parse_result/comments.rb", - "lib/yarp/parse_result/newlines.rb", - "src/diagnostic.c", - "src/enc/yp_big5.c", - "src/enc/yp_euc_jp.c", - "src/enc/yp_gbk.c", - "src/enc/yp_shift_jis.c", - "src/enc/yp_tables.c", - "src/enc/yp_unicode.c", - "src/enc/yp_windows_31j.c", - "src/node.c", - "src/pack.c", - "src/prettyprint.c", - "src/regexp.c", - "src/serialize.c", - "src/token_type.c", - "src/unescape.c", - "src/util/yp_buffer.c", - "src/util/yp_char.c", - "src/util/yp_constant_pool.c", - "src/util/yp_list.c", - "src/util/yp_memchr.c", - "src/util/yp_newline_list.c", - "src/util/yp_state_stack.c", - "src/util/yp_string.c", - "src/util/yp_string_list.c", - "src/util/yp_strncasecmp.c", - "src/util/yp_strpbrk.c", - "src/yarp.c", - "yarp.gemspec", - ] - - spec.extensions = ["ext/yarp/extconf.rb"] - spec.metadata["allowed_push_host"] = "https://rubygems.org" -end |
