summaryrefslogtreecommitdiff
path: root/lib/prism
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-09-27 12:22:36 -0400
committerKevin Newton <kddnewton@gmail.com>2023-09-27 13:57:38 -0400
commit8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch)
tree46ef2bd5c51d5b7f923eda6a60edefc7a08200db /lib/prism
parent7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff)
Rename YARP filepaths to prism filepaths
Diffstat (limited to 'lib/prism')
-rw-r--r--lib/prism/debug.rb157
-rw-r--r--lib/prism/desugar_compiler.rb206
-rw-r--r--lib/prism/ffi.rb251
-rw-r--r--lib/prism/language_server.rb166
-rw-r--r--lib/prism/lex_compat.rb838
-rw-r--r--lib/prism/node_ext.rb55
-rw-r--r--lib/prism/node_inspector.rb68
-rw-r--r--lib/prism/pack.rb185
-rw-r--r--lib/prism/parse_result.rb266
-rw-r--r--lib/prism/parse_result/comments.rb172
-rw-r--r--lib/prism/parse_result/newlines.rb60
-rw-r--r--lib/prism/pattern.rb239
-rw-r--r--lib/prism/prism.gemspec113
-rw-r--r--lib/prism/ripper_compat.rb174
-rw-r--r--lib/prism/version.rb5
15 files changed, 2955 insertions, 0 deletions
diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb
new file mode 100644
index 0000000000..39df1e838c
--- /dev/null
+++ b/lib/prism/debug.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+module YARP
+ # This module is used for testing and debugging and is not meant to be used by
+ # consumers of this library.
+ module Debug
+ class ISeq
+ attr_reader :parts
+
+ def initialize(parts)
+ @parts = parts
+ end
+
+ def type
+ parts[0]
+ end
+
+ def local_table
+ parts[10]
+ end
+
+ def instructions
+ parts[13]
+ end
+
+ def each_child
+ instructions.each do |instruction|
+ # Only look at arrays. Other instructions are line numbers or
+ # tracepoint events.
+ next unless instruction.is_a?(Array)
+
+ instruction.each do |opnd|
+ # Only look at arrays. Other operands are literals.
+ next unless opnd.is_a?(Array)
+
+ # Only look at instruction sequences. Other operands are literals.
+ next unless opnd[0] == "YARVInstructionSequence/SimpleDataFormat"
+
+ yield ISeq.new(opnd)
+ end
+ end
+ end
+ end
+
+ # For the given source, compiles with CRuby and returns a list of all of the
+ # sets of local variables that were encountered.
+ def self.cruby_locals(source)
+ verbose = $VERBOSE
+ $VERBOSE = nil
+
+ begin
+ locals = []
+ stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)]
+
+ while (iseq = stack.pop)
+ if iseq.type != :once
+ names = iseq.local_table
+
+ # CRuby will push on a special local variable when there are keyword
+ # arguments. We get rid of that here.
+ names = names.grep_v(Integer)
+
+ # For some reason, CRuby occasionally pushes this special local
+ # variable when there are splat arguments. We get rid of that here.
+ names = names.grep_v(:"#arg_rest")
+
+ # Now push them onto the list of locals.
+ locals << names
+ end
+
+ iseq.each_child { |child| stack << child }
+ end
+
+ locals
+ ensure
+ $VERBOSE = verbose
+ end
+ end
+
+ # For the given source, parses with YARP and returns a list of all of the
+ # sets of local variables that were encountered.
+ def self.yarp_locals(source)
+ locals = []
+ stack = [YARP.parse(source).value]
+
+ while (node = stack.pop)
+ case node
+ when BlockNode, DefNode, LambdaNode
+ names = node.locals
+
+ params = node.parameters
+ params = params&.parameters unless node.is_a?(DefNode)
+
+ # YARP places parameters in the same order that they appear in the
+ # source. CRuby places them in the order that they need to appear
+ # according to their own internal calling convention. We mimic that
+ # order here so that we can compare properly.
+ if params
+ sorted = [
+ *params.requireds.grep(RequiredParameterNode).map(&:name),
+ *params.optionals.map(&:name),
+ *((params.rest.name || :*) if params.rest && params.rest.operator != ","),
+ *params.posts.grep(RequiredParameterNode).map(&:name),
+ *params.keywords.reject(&:value).map(&:name),
+ *params.keywords.select(&:value).map(&:name)
+ ]
+
+ # TODO: When we get a ... parameter, we should be pushing * and &
+ # onto the local list. We don't do that yet, so we need to add them
+ # in here.
+ if params.keyword_rest.is_a?(ForwardingParameterNode)
+ sorted.push(:*, :&, :"...")
+ end
+
+ # Recurse down the parameter tree to find any destructured
+ # parameters and add them after the other parameters.
+ param_stack = params.requireds.concat(params.posts).grep(RequiredDestructuredParameterNode).reverse
+ while (param = param_stack.pop)
+ case param
+ when RequiredDestructuredParameterNode
+ param_stack.concat(param.parameters.reverse)
+ when RequiredParameterNode
+ sorted << param.name
+ when SplatNode
+ sorted << param.expression.name if param.expression
+ end
+ end
+
+ names = sorted.concat(names - sorted)
+ end
+
+ locals << names
+ when ClassNode, ModuleNode, ProgramNode, SingletonClassNode
+ locals << node.locals
+ when ForNode
+ locals << []
+ when PostExecutionNode
+ locals.push([], [])
+ when InterpolatedRegularExpressionNode
+ locals << [] if node.once?
+ end
+
+ stack.concat(node.compact_child_nodes)
+ end
+
+ locals
+ end
+
+ def self.newlines(source)
+ YARP.parse(source).source.offsets
+ end
+
+ def self.parse_serialize_file(filepath)
+ parse_serialize_file_metadata(filepath, [filepath.bytesize, filepath.b, 0].pack("LA*L"))
+ end
+ end
+end
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb
new file mode 100644
index 0000000000..b86e8518c6
--- /dev/null
+++ b/lib/prism/desugar_compiler.rb
@@ -0,0 +1,206 @@
+# frozen_string_literal: true
+
+module YARP
+ # DesugarCompiler is a compiler that desugars Ruby code into a more primitive
+ # form. This is useful for consumers that want to deal with fewer node types.
+ class DesugarCompiler < MutationCompiler
+ # @@foo &&= bar
+ #
+ # becomes
+ #
+ # @@foo && @@foo = bar
+ def visit_class_variable_and_write_node(node)
+ desugar_and_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name)
+ end
+
+ # @@foo ||= bar
+ #
+ # becomes
+ #
+ # defined?(@@foo) ? @@foo : @@foo = bar
+ def visit_class_variable_or_write_node(node)
+ desugar_or_write_defined_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name)
+ end
+
+ # @@foo += bar
+ #
+ # becomes
+ #
+ # @@foo = @@foo + bar
+ def visit_class_variable_operator_write_node(node)
+ desugar_operator_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name)
+ end
+
+ # Foo &&= bar
+ #
+ # becomes
+ #
+ # Foo && Foo = bar
+ def visit_constant_and_write_node(node)
+ desugar_and_write_node(node, ConstantReadNode, ConstantWriteNode, node.name)
+ end
+
+ # Foo ||= bar
+ #
+ # becomes
+ #
+ # defined?(Foo) ? Foo : Foo = bar
+ def visit_constant_or_write_node(node)
+ desugar_or_write_defined_node(node, ConstantReadNode, ConstantWriteNode, node.name)
+ end
+
+ # Foo += bar
+ #
+ # becomes
+ #
+ # Foo = Foo + bar
+ def visit_constant_operator_write_node(node)
+ desugar_operator_write_node(node, ConstantReadNode, ConstantWriteNode, node.name)
+ end
+
+ # $foo &&= bar
+ #
+ # becomes
+ #
+ # $foo && $foo = bar
+ def visit_global_variable_and_write_node(node)
+ desugar_and_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name)
+ end
+
+ # $foo ||= bar
+ #
+ # becomes
+ #
+ # defined?($foo) ? $foo : $foo = bar
+ def visit_global_variable_or_write_node(node)
+ desugar_or_write_defined_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name)
+ end
+
+ # $foo += bar
+ #
+ # becomes
+ #
+ # $foo = $foo + bar
+ def visit_global_variable_operator_write_node(node)
+ desugar_operator_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name)
+ end
+
+ # @foo &&= bar
+ #
+ # becomes
+ #
+ # @foo && @foo = bar
+ def visit_instance_variable_and_write_node(node)
+ desugar_and_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name)
+ end
+
+ # @foo ||= bar
+ #
+ # becomes
+ #
+ # @foo || @foo = bar
+ def visit_instance_variable_or_write_node(node)
+ desugar_or_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name)
+ end
+
+ # @foo += bar
+ #
+ # becomes
+ #
+ # @foo = @foo + bar
+ def visit_instance_variable_operator_write_node(node)
+ desugar_operator_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name)
+ end
+
+ # foo &&= bar
+ #
+ # becomes
+ #
+ # foo && foo = bar
+ def visit_local_variable_and_write_node(node)
+ desugar_and_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth)
+ end
+
+ # foo ||= bar
+ #
+ # becomes
+ #
+ # foo || foo = bar
+ def visit_local_variable_or_write_node(node)
+ desugar_or_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth)
+ end
+
+ # foo += bar
+ #
+ # becomes
+ #
+ # foo = foo + bar
+ def visit_local_variable_operator_write_node(node)
+ desugar_operator_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth)
+ end
+
+ private
+
+ # Desugar `x &&= y` to `x && x = y`
+ def desugar_and_write_node(node, read_class, write_class, *arguments)
+ AndNode.new(
+ read_class.new(*arguments, node.name_loc),
+ write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location),
+ node.operator_loc,
+ node.location
+ )
+ end
+
+ # Desugar `x += y` to `x = x + y`
+ def desugar_operator_write_node(node, read_class, write_class, *arguments)
+ write_class.new(
+ *arguments,
+ node.name_loc,
+ CallNode.new(
+ read_class.new(*arguments, node.name_loc),
+ nil,
+ node.operator_loc.copy(length: node.operator_loc.length - 1),
+ nil,
+ ArgumentsNode.new([node.value], node.value.location),
+ nil,
+ nil,
+ 0,
+ node.operator_loc.slice.chomp("="),
+ node.location
+ ),
+ node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
+ node.location
+ )
+ end
+
+ # Desugar `x ||= y` to `x || x = y`
+ def desugar_or_write_node(node, read_class, write_class, *arguments)
+ OrNode.new(
+ read_class.new(*arguments, node.name_loc),
+ write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location),
+ node.operator_loc,
+ node.location
+ )
+ end
+
+ # Desugar `x ||= y` to `defined?(x) ? x : x = y`
+ def desugar_or_write_defined_node(node, read_class, write_class, *arguments)
+ IfNode.new(
+ node.operator_loc,
+ DefinedNode.new(nil, read_class.new(*arguments, node.name_loc), nil, node.operator_loc, node.name_loc),
+ StatementsNode.new([read_class.new(*arguments, node.name_loc)], node.location),
+ ElseNode.new(
+ node.operator_loc,
+ StatementsNode.new(
+ [write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location)],
+ node.location
+ ),
+ node.operator_loc,
+ node.location
+ ),
+ node.operator_loc,
+ node.location
+ )
+ end
+ end
+end
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
new file mode 100644
index 0000000000..82643be808
--- /dev/null
+++ b/lib/prism/ffi.rb
@@ -0,0 +1,251 @@
+# frozen_string_literal: true
+
+# This file is responsible for mirroring the API provided by the C extension by
+# using FFI to call into the shared library.
+
+require "rbconfig"
+require "ffi"
+
+module YARP
+ BACKEND = :FFI
+
+ module LibRubyParser
+ extend FFI::Library
+
+ # Define the library that we will be pulling functions from. Note that this
+ # must align with the build shared library from make/rake.
+ ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
+
+ # Convert a native C type declaration into a symbol that FFI understands.
+ # For example:
+ #
+ # const char * -> :pointer
+ # bool -> :bool
+ # size_t -> :size_t
+ # void -> :void
+ #
+ def self.resolve_type(type)
+ type = type.strip.delete_prefix("const ")
+ type.end_with?("*") ? :pointer : type.to_sym
+ end
+
+ # Read through the given header file and find the declaration of each of the
+ # given functions. For each one, define a function with the same name and
+ # signature as the C function.
+ def self.load_exported_functions_from(header, *functions)
+ File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
+ # We only want to attempt to load exported functions.
+ next unless line.start_with?("YP_EXPORTED_FUNCTION ")
+
+ # We only want to load the functions that we are interested in.
+ next unless functions.any? { |function| line.include?(function) }
+
+ # Parse the function declaration.
+ unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
+ raise "Could not parse #{line}"
+ end
+
+ # Delete the function from the list of functions we are looking for to
+ # mark it as having been found.
+ functions.delete(name)
+
+ # Split up the argument types into an array, ensure we handle the case
+ # where there are no arguments (by explicit void).
+ arg_types = arg_types.split(",").map(&:strip)
+ arg_types = [] if arg_types == %w[void]
+
+ # Resolve the type of the argument by dropping the name of the argument
+ # first if it is present.
+ arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
+
+ # Attach the function using the FFI library.
+ attach_function name, arg_types, resolve_type(return_type)
+ end
+
+ # If we didn't find all of the functions, raise an error.
+ raise "Could not find functions #{functions.inspect}" unless functions.empty?
+ end
+
+ load_exported_functions_from(
+ "yarp.h",
+ "yp_version",
+ "yp_parse_serialize",
+ "yp_lex_serialize",
+ "yp_parse_lex_serialize"
+ )
+
+ load_exported_functions_from(
+ "yarp/util/yp_buffer.h",
+ "yp_buffer_sizeof",
+ "yp_buffer_init",
+ "yp_buffer_value",
+ "yp_buffer_length",
+ "yp_buffer_free"
+ )
+
+ load_exported_functions_from(
+ "yarp/util/yp_string.h",
+ "yp_string_mapped_init",
+ "yp_string_free",
+ "yp_string_source",
+ "yp_string_length",
+ "yp_string_sizeof"
+ )
+
+ # This object represents a yp_buffer_t. We only use it as an opaque pointer,
+ # so it doesn't need to know the fields of yp_buffer_t.
+ class YPBuffer
+ SIZEOF = LibRubyParser.yp_buffer_sizeof
+
+ attr_reader :pointer
+
+ def initialize(pointer)
+ @pointer = pointer
+ end
+
+ def value
+ LibRubyParser.yp_buffer_value(pointer)
+ end
+
+ def length
+ LibRubyParser.yp_buffer_length(pointer)
+ end
+
+ def read
+ value.read_string(length)
+ end
+
+ # Initialize a new buffer and yield it to the block. The buffer will be
+ # automatically freed when the block returns.
+ def self.with(&block)
+ pointer = FFI::MemoryPointer.new(SIZEOF)
+
+ begin
+ raise unless LibRubyParser.yp_buffer_init(pointer)
+ yield new(pointer)
+ ensure
+ LibRubyParser.yp_buffer_free(pointer)
+ pointer.free
+ end
+ end
+ end
+
+ # This object represents a yp_string_t. We only use it as an opaque pointer,
+ # so it doesn't have to be an FFI::Struct.
+ class YPString
+ SIZEOF = LibRubyParser.yp_string_sizeof
+
+ attr_reader :pointer
+
+ def initialize(pointer)
+ @pointer = pointer
+ end
+
+ def source
+ LibRubyParser.yp_string_source(pointer)
+ end
+
+ def length
+ LibRubyParser.yp_string_length(pointer)
+ end
+
+ def read
+ source.read_string(length)
+ end
+
+ # Yields a yp_string_t pointer to the given block.
+ def self.with(filepath, &block)
+ pointer = FFI::MemoryPointer.new(SIZEOF)
+
+ begin
+ raise unless LibRubyParser.yp_string_mapped_init(pointer, filepath)
+ yield new(pointer)
+ ensure
+ LibRubyParser.yp_string_free(pointer)
+ pointer.free
+ end
+ end
+ end
+
+ def self.dump_internal(source, source_size, filepath)
+ YPBuffer.with do |buffer|
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
+ yp_parse_serialize(source, source_size, buffer.pointer, metadata)
+ buffer.read
+ end
+ end
+ end
+
+ # Mark the LibRubyParser module as private as it should only be called through
+ # the YARP module.
+ private_constant :LibRubyParser
+
+ # The version constant is set by reading the result of calling yp_version.
+ VERSION = LibRubyParser.yp_version.read_string
+
+ # Mirror the YARP.dump API by using the serialization API.
+ def self.dump(code, filepath = nil)
+ LibRubyParser.dump_internal(code, code.bytesize, filepath)
+ end
+
+ # Mirror the YARP.dump_file API by using the serialization API.
+ def self.dump_file(filepath)
+ LibRubyParser::YPString.with(filepath) do |string|
+ LibRubyParser.dump_internal(string.source, string.length, filepath)
+ end
+ end
+
+ # Mirror the YARP.lex API by using the serialization API.
+ def self.lex(code, filepath = nil)
+ LibRubyParser::YPBuffer.with do |buffer|
+ LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
+ Serialize.load_tokens(Source.new(code), buffer.read)
+ end
+ end
+
+ # Mirror the YARP.lex_file API by using the serialization API.
+ def self.lex_file(filepath)
+ LibRubyParser::YPString.with(filepath) do |string|
+ lex(string.read, filepath)
+ end
+ end
+
+ # Mirror the YARP.parse API by using the serialization API.
+ def self.parse(code, filepath = nil)
+ YARP.load(code, dump(code, filepath))
+ end
+
+ # Mirror the YARP.parse_file API by using the serialization API. This uses
+ # native strings instead of Ruby strings because it allows us to use mmap when
+ # it is available.
+ def self.parse_file(filepath)
+ LibRubyParser::YPString.with(filepath) do |string|
+ parse(string.read, filepath)
+ end
+ end
+
+ # Mirror the YARP.parse_lex API by using the serialization API.
+ def self.parse_lex(code, filepath = nil)
+ LibRubyParser::YPBuffer.with do |buffer|
+ metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
+ LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
+
+ source = Source.new(code)
+ loader = Serialize::Loader.new(source, buffer.read)
+
+ tokens = loader.load_tokens
+ node, comments, errors, warnings = loader.load_nodes
+
+ tokens.each { |token,| token.value.force_encoding(loader.encoding) }
+
+ ParseResult.new([node, tokens], comments, errors, warnings, source)
+ end
+ end
+
+ # Mirror the YARP.parse_lex_file API by using the serialization API.
+ def self.parse_lex_file(filepath)
+ LibRubyParser::YPString.with(filepath) do |string|
+ parse_lex(string.read, filepath)
+ end
+ end
+end
diff --git a/lib/prism/language_server.rb b/lib/prism/language_server.rb
new file mode 100644
index 0000000000..5a10d484a1
--- /dev/null
+++ b/lib/prism/language_server.rb
@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+
+require "cgi"
+require "json"
+require "uri"
+
+module YARP
+ # YARP additionally ships with a language server conforming to the
+ # language server protocol. It can be invoked by running the yarp-lsp
+ # bin script (bin/yarp-lsp)
+ class LanguageServer
+ GITHUB_TEMPLATE = <<~TEMPLATE
+ Reporting issue with error `%{error}`.
+
+ ## Expected behavior
+ <!-- TODO: Briefly explain what the expected behavior should be on this example. -->
+
+ ## Actual behavior
+ <!-- TODO: Describe here what actually happened. -->
+
+ ## Steps to reproduce the problem
+ <!-- TODO: Describe how we can reproduce the problem. -->
+
+ ## Additional information
+ <!-- TODO: Include any additional information, such as screenshots. -->
+
+ TEMPLATE
+
+ attr_reader :input, :output
+
+ def initialize(
+ input: $stdin,
+ output: $stdout
+ )
+ @input = input.binmode
+ @output = output.binmode
+ end
+
+ # rubocop:disable Layout/LineLength
+ def run
+ store =
+ Hash.new do |hash, uri|
+ filepath = CGI.unescape(URI.parse(uri).path)
+ File.exist?(filepath) ? (hash[uri] = File.read(filepath)) : nil
+ end
+
+ while (headers = input.gets("\r\n\r\n"))
+ source = input.read(headers[/Content-Length: (\d+)/i, 1].to_i)
+ request = JSON.parse(source, symbolize_names: true)
+
+ # stree-ignore
+ case request
+ in { method: "initialize", id: }
+ store.clear
+ write(id: id, result: { capabilities: capabilities })
+ in { method: "initialized" }
+ # ignored
+ in { method: "shutdown" } # tolerate missing ID to be a good citizen
+ store.clear
+ write(id: request[:id], result: {})
+ in { method: "exit"}
+ return
+ in { method: "textDocument/didChange", params: { textDocument: { uri: }, contentChanges: [{ text: }, *] } }
+ store[uri] = text
+ in { method: "textDocument/didOpen", params: { textDocument: { uri:, text: } } }
+ store[uri] = text
+ in { method: "textDocument/didClose", params: { textDocument: { uri: } } }
+ store.delete(uri)
+ in { method: "textDocument/diagnostic", id:, params: { textDocument: { uri: } } }
+ contents = store[uri]
+ write(id: id, result: contents ? diagnostics(contents) : nil)
+ in { method: "textDocument/codeAction", id:, params: { textDocument: { uri: }, context: { diagnostics: }}}
+ contents = store[uri]
+ write(id: id, result: contents ? code_actions(contents, diagnostics) : nil)
+ in { method: %r{\$/.+} }
+ # ignored
+ end
+ end
+ end
+ # rubocop:enable Layout/LineLength
+
+ private
+
+ def capabilities
+ {
+ codeActionProvider: {
+ codeActionKinds: [
+ 'quickfix',
+ ],
+ },
+ diagnosticProvider: {
+ interFileDependencies: false,
+ workspaceDiagnostics: false,
+ },
+ textDocumentSync: {
+ change: 1,
+ openClose: true
+ },
+ }
+ end
+
+ def code_actions(source, diagnostics)
+ diagnostics.map do |diagnostic|
+ message = diagnostic[:message]
+ issue_content = URI.encode_www_form_component(GITHUB_TEMPLATE % {error: message})
+ issue_link = "https://github.com/ruby/yarp/issues/new?&labels=Bug&body=#{issue_content}"
+
+ {
+ title: "Report incorrect error: `#{diagnostic[:message]}`",
+ kind: "quickfix",
+ diagnostics: [diagnostic],
+ command: {
+ title: "Report incorrect error",
+ command: "vscode.open",
+ arguments: [issue_link]
+ }
+ }
+ end
+ end
+
+ def diagnostics(source)
+ offsets = Hash.new do |hash, key|
+ slice = source.byteslice(...key)
+ lineno = slice.count("\n")
+
+ char = slice.length
+ newline = source.rindex("\n", [char - 1, 0].max) || -1
+ hash[key] = { line: lineno, character: char - newline - 1 }
+ end
+
+ parse_output = YARP.parse(source)
+
+ {
+ kind: "full",
+ items: [
+ *parse_output.errors.map do |error|
+ {
+ range: {
+ start: offsets[error.location.start_offset],
+ end: offsets[error.location.end_offset],
+ },
+ message: error.message,
+ severity: 1,
+ }
+ end,
+ *parse_output.warnings.map do |warning|
+ {
+ range: {
+ start: offsets[warning.location.start_offset],
+ end: offsets[warning.location.end_offset],
+ },
+ message: warning.message,
+ severity: 2,
+ }
+ end,
+ ]
+ }
+ end
+
+ def write(value)
+ response = value.merge(jsonrpc: "2.0").to_json
+ output.print("Content-Length: #{response.bytesize}\r\n\r\n#{response}")
+ output.flush
+ end
+ end
+end
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
new file mode 100644
index 0000000000..720ac2b59b
--- /dev/null
+++ b/lib/prism/lex_compat.rb
@@ -0,0 +1,838 @@
+# frozen_string_literal: true
+
+require "delegate"
+
+module YARP
+ # This class is responsible for lexing the source using YARP and then
+ # converting those tokens to be compatible with Ripper. In the vast majority
+ # of cases, this is a one-to-one mapping of the token type. Everything else
+ # generally lines up. However, there are a few cases that require special
+ # handling.
+ class LexCompat
+ # This is a mapping of YARP token types to Ripper token types. This is a
+ # many-to-one mapping because we split up our token types, whereas Ripper
+ # tends to group them.
+ RIPPER = {
+ AMPERSAND: :on_op,
+ AMPERSAND_AMPERSAND: :on_op,
+ AMPERSAND_AMPERSAND_EQUAL: :on_op,
+ AMPERSAND_DOT: :on_op,
+ AMPERSAND_EQUAL: :on_op,
+ BACK_REFERENCE: :on_backref,
+ BACKTICK: :on_backtick,
+ BANG: :on_op,
+ BANG_EQUAL: :on_op,
+ BANG_TILDE: :on_op,
+ BRACE_LEFT: :on_lbrace,
+ BRACE_RIGHT: :on_rbrace,
+ BRACKET_LEFT: :on_lbracket,
+ BRACKET_LEFT_ARRAY: :on_lbracket,
+ BRACKET_LEFT_RIGHT: :on_op,
+ BRACKET_LEFT_RIGHT_EQUAL: :on_op,
+ BRACKET_RIGHT: :on_rbracket,
+ CARET: :on_op,
+ CARET_EQUAL: :on_op,
+ CHARACTER_LITERAL: :on_CHAR,
+ CLASS_VARIABLE: :on_cvar,
+ COLON: :on_op,
+ COLON_COLON: :on_op,
+ COMMA: :on_comma,
+ COMMENT: :on_comment,
+ CONSTANT: :on_const,
+ DOT: :on_period,
+ DOT_DOT: :on_op,
+ DOT_DOT_DOT: :on_op,
+ EMBDOC_BEGIN: :on_embdoc_beg,
+ EMBDOC_END: :on_embdoc_end,
+ EMBDOC_LINE: :on_embdoc,
+ EMBEXPR_BEGIN: :on_embexpr_beg,
+ EMBEXPR_END: :on_embexpr_end,
+ EMBVAR: :on_embvar,
+ EOF: :on_eof,
+ EQUAL: :on_op,
+ EQUAL_EQUAL: :on_op,
+ EQUAL_EQUAL_EQUAL: :on_op,
+ EQUAL_GREATER: :on_op,
+ EQUAL_TILDE: :on_op,
+ FLOAT: :on_float,
+ FLOAT_IMAGINARY: :on_imaginary,
+ FLOAT_RATIONAL: :on_rational,
+ FLOAT_RATIONAL_IMAGINARY: :on_imaginary,
+ GREATER: :on_op,
+ GREATER_EQUAL: :on_op,
+ GREATER_GREATER: :on_op,
+ GREATER_GREATER_EQUAL: :on_op,
+ GLOBAL_VARIABLE: :on_gvar,
+ HEREDOC_END: :on_heredoc_end,
+ HEREDOC_START: :on_heredoc_beg,
+ IDENTIFIER: :on_ident,
+ IGNORED_NEWLINE: :on_ignored_nl,
+ INTEGER: :on_int,
+ INTEGER_IMAGINARY: :on_imaginary,
+ INTEGER_RATIONAL: :on_rational,
+ INTEGER_RATIONAL_IMAGINARY: :on_imaginary,
+ INSTANCE_VARIABLE: :on_ivar,
+ INVALID: :INVALID,
+ KEYWORD___ENCODING__: :on_kw,
+ KEYWORD___LINE__: :on_kw,
+ KEYWORD___FILE__: :on_kw,
+ KEYWORD_ALIAS: :on_kw,
+ KEYWORD_AND: :on_kw,
+ KEYWORD_BEGIN: :on_kw,
+ KEYWORD_BEGIN_UPCASE: :on_kw,
+ KEYWORD_BREAK: :on_kw,
+ KEYWORD_CASE: :on_kw,
+ KEYWORD_CLASS: :on_kw,
+ KEYWORD_DEF: :on_kw,
+ KEYWORD_DEFINED: :on_kw,
+ KEYWORD_DO: :on_kw,
+ KEYWORD_DO_LOOP: :on_kw,
+ KEYWORD_ELSE: :on_kw,
+ KEYWORD_ELSIF: :on_kw,
+ KEYWORD_END: :on_kw,
+ KEYWORD_END_UPCASE: :on_kw,
+ KEYWORD_ENSURE: :on_kw,
+ KEYWORD_FALSE: :on_kw,
+ KEYWORD_FOR: :on_kw,
+ KEYWORD_IF: :on_kw,
+ KEYWORD_IF_MODIFIER: :on_kw,
+ KEYWORD_IN: :on_kw,
+ KEYWORD_MODULE: :on_kw,
+ KEYWORD_NEXT: :on_kw,
+ KEYWORD_NIL: :on_kw,
+ KEYWORD_NOT: :on_kw,
+ KEYWORD_OR: :on_kw,
+ KEYWORD_REDO: :on_kw,
+ KEYWORD_RESCUE: :on_kw,
+ KEYWORD_RESCUE_MODIFIER: :on_kw,
+ KEYWORD_RETRY: :on_kw,
+ KEYWORD_RETURN: :on_kw,
+ KEYWORD_SELF: :on_kw,
+ KEYWORD_SUPER: :on_kw,
+ KEYWORD_THEN: :on_kw,
+ KEYWORD_TRUE: :on_kw,
+ KEYWORD_UNDEF: :on_kw,
+ KEYWORD_UNLESS: :on_kw,
+ KEYWORD_UNLESS_MODIFIER: :on_kw,
+ KEYWORD_UNTIL: :on_kw,
+ KEYWORD_UNTIL_MODIFIER: :on_kw,
+ KEYWORD_WHEN: :on_kw,
+ KEYWORD_WHILE: :on_kw,
+ KEYWORD_WHILE_MODIFIER: :on_kw,
+ KEYWORD_YIELD: :on_kw,
+ LABEL: :on_label,
+ LABEL_END: :on_label_end,
+ LAMBDA_BEGIN: :on_tlambeg,
+ LESS: :on_op,
+ LESS_EQUAL: :on_op,
+ LESS_EQUAL_GREATER: :on_op,
+ LESS_LESS: :on_op,
+ LESS_LESS_EQUAL: :on_op,
+ METHOD_NAME: :on_ident,
+ MINUS: :on_op,
+ MINUS_EQUAL: :on_op,
+ MINUS_GREATER: :on_tlambda,
+ NEWLINE: :on_nl,
+ NUMBERED_REFERENCE: :on_backref,
+ PARENTHESIS_LEFT: :on_lparen,
+ PARENTHESIS_LEFT_PARENTHESES: :on_lparen,
+ PARENTHESIS_RIGHT: :on_rparen,
+ PERCENT: :on_op,
+ PERCENT_EQUAL: :on_op,
+ PERCENT_LOWER_I: :on_qsymbols_beg,
+ PERCENT_LOWER_W: :on_qwords_beg,
+ PERCENT_LOWER_X: :on_backtick,
+ PERCENT_UPPER_I: :on_symbols_beg,
+ PERCENT_UPPER_W: :on_words_beg,
+ PIPE: :on_op,
+ PIPE_EQUAL: :on_op,
+ PIPE_PIPE: :on_op,
+ PIPE_PIPE_EQUAL: :on_op,
+ PLUS: :on_op,
+ PLUS_EQUAL: :on_op,
+ QUESTION_MARK: :on_op,
+ RATIONAL_FLOAT: :on_rational,
+ RATIONAL_INTEGER: :on_rational,
+ REGEXP_BEGIN: :on_regexp_beg,
+ REGEXP_END: :on_regexp_end,
+ SEMICOLON: :on_semicolon,
+ SLASH: :on_op,
+ SLASH_EQUAL: :on_op,
+ STAR: :on_op,
+ STAR_EQUAL: :on_op,
+ STAR_STAR: :on_op,
+ STAR_STAR_EQUAL: :on_op,
+ STRING_BEGIN: :on_tstring_beg,
+ STRING_CONTENT: :on_tstring_content,
+ STRING_END: :on_tstring_end,
+ SYMBOL_BEGIN: :on_symbeg,
+ TILDE: :on_op,
+ UAMPERSAND: :on_op,
+ UCOLON_COLON: :on_op,
+ UDOT_DOT: :on_op,
+ UDOT_DOT_DOT: :on_op,
+ UMINUS: :on_op,
+ UMINUS_NUM: :on_op,
+ UPLUS: :on_op,
+ USTAR: :on_op,
+ USTAR_STAR: :on_op,
+ WORDS_SEP: :on_words_sep,
+ "__END__": :on___end__
+ }.freeze
+
+ # When we produce tokens, we produce the same arrays that Ripper does.
+ # However, we add a couple of convenience methods onto them to make them a
+ # little easier to work with. We delegate all other methods to the array.
+ class Token < SimpleDelegator
+ def location
+ self[0]
+ end
+
+ def event
+ self[1]
+ end
+
+ def value
+ self[2]
+ end
+
+ def state
+ self[3]
+ end
+ end
+
+ # Ripper doesn't include the rest of the token in the event, so we need to
+ # trim it down to just the content on the first line when comparing.
+ class EndContentToken < Token
+ def ==(other)
+ [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
+ end
+ end
+
+ # Tokens where state should be ignored
+ # used for :on_comment, :on_heredoc_end, :on_embexpr_end
+ class IgnoreStateToken < Token
+ def ==(other)
+ self[0...-1] == other[0...-1]
+ end
+ end
+
+ # Ident tokens for the most part are exactly the same, except sometimes we
+ # know an ident is a local when ripper doesn't (when they are introduced
+ # through named captures in regular expressions). In that case we don't
+ # compare the state.
+ class IdentToken < Token
+ def ==(other)
+ (self[0...-1] == other[0...-1]) && (
+ (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
+ (other[3] & Ripper::EXPR_ARG_ANY != 0)
+ )
+ end
+ end
+
+ # Ignored newlines can occasionally have a LABEL state attached to them, so
+ # we compare the state differently here.
+ class IgnoredNewlineToken < Token
+ def ==(other)
+ return false unless self[0...-1] == other[0...-1]
+
+ if self[4] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
+ other[4] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED > 0
+ else
+ self[4] == other[4]
+ end
+ end
+ end
+
+ # If we have an identifier that follows a method name like:
+ #
+ # def foo bar
+ #
+ # then Ripper will mark bar as END|LABEL if there is a local in a parent
+ # scope named bar because it hasn't pushed the local table yet. We do this
+ # more accurately, so we need to allow comparing against both END and
+ # END|LABEL.
+ class ParamToken < Token
+ def ==(other)
+ (self[0...-1] == other[0...-1]) && (
+ (other[3] == Ripper::EXPR_END) ||
+ (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
+ )
+ end
+ end
+
+ # A heredoc in this case is a list of tokens that belong to the body of the
+ # heredoc that should be appended onto the list of tokens when the heredoc
+ # closes.
+ module Heredoc
+ # Heredocs that are no dash or tilde heredocs are just a list of tokens.
+ # We need to keep them around so that we can insert them in the correct
+ # order back into the token stream and set the state of the last token to
+ # the state that the heredoc was opened in.
+ class PlainHeredoc
+ attr_reader :tokens
+
+ def initialize
+ @tokens = []
+ end
+
+ def <<(token)
+ tokens << token
+ end
+
+ def to_a
+ tokens
+ end
+ end
+
+ # Dash heredocs are a little more complicated. They are a list of tokens
+ # that need to be split on "\\\n" to mimic Ripper's behavior. We also need
+ # to keep track of the state that the heredoc was opened in.
+ class DashHeredoc
+ attr_reader :split, :tokens
+
+ def initialize(split)
+ @split = split
+ @tokens = []
+ end
+
+ def <<(token)
+ tokens << token
+ end
+
+ def to_a
+ embexpr_balance = 0
+
+ tokens.each_with_object([]) do |token, results|
+ case token.event
+ when :on_embexpr_beg
+ embexpr_balance += 1
+ results << token
+ when :on_embexpr_end
+ embexpr_balance -= 1
+ results << token
+ when :on_tstring_content
+ if embexpr_balance == 0
+ lineno = token[0][0]
+ column = token[0][1]
+
+ if split
+ # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
+ # to keep the delimiter in the result.
+ token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+ column = 0 if index > 0
+ results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ lineno += value.count("\n")
+ end
+ else
+ results << token
+ end
+ else
+ results << token
+ end
+ else
+ results << token
+ end
+ end
+ end
+ end
+
+ # Heredocs that are dedenting heredocs are a little more complicated.
+ # Ripper outputs on_ignored_sp tokens for the whitespace that is being
+ # removed from the output. YARP only modifies the node itself and keeps
+ # the token the same. This simplifies YARP, but makes comparing against
+ # Ripper much harder because there is a length mismatch.
+ #
+ # Fortunately, we already have to pull out the heredoc tokens in order to
+ # insert them into the stream in the correct order. As such, we can do
+ # some extra manipulation on the tokens to make them match Ripper's
+ # output by mirroring the dedent logic that Ripper uses.
+ class DedentingHeredoc
+ TAB_WIDTH = 8
+
+ attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
+
+ def initialize
+ @tokens = []
+ @dedent_next = true
+ @dedent = nil
+ @embexpr_balance = 0
+ end
+
+ # As tokens are coming in, we track the minimum amount of common leading
+ # whitespace on plain string content tokens. This allows us to later
+ # remove that amount of whitespace from the beginning of each line.
+ def <<(token)
+ case token.event
+ when :on_embexpr_beg, :on_heredoc_beg
+ @embexpr_balance += 1
+ when :on_embexpr_end, :on_heredoc_end
+ @embexpr_balance -= 1
+ when :on_tstring_content
+ if embexpr_balance == 0
+ token.value.split(/(?<=\n)/).each_with_index do |line, index|
+ next if line.strip.empty? && line.end_with?("\n")
+ next if !(dedent_next || index > 0)
+
+ leading = line[/\A(\s*)\n?/, 1]
+ next_dedent = 0
+
+ leading.each_char do |char|
+ if char == "\t"
+ next_dedent = next_dedent - (next_dedent % TAB_WIDTH) + TAB_WIDTH
+ else
+ next_dedent += 1
+ end
+ end
+
+ @dedent = [dedent, next_dedent].compact.min
+ end
+ end
+ end
+
+ @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+ tokens << token
+ end
+
+ def to_a
+ # If every line in the heredoc is blank, we still need to split up the
+ # string content token into multiple tokens.
+ if dedent.nil?
+ results = []
+ embexpr_balance = 0
+
+ tokens.each do |token|
+ case token.event
+ when :on_embexpr_beg, :on_heredoc_beg
+ embexpr_balance += 1
+ results << token
+ when :on_embexpr_end, :on_heredoc_end
+ embexpr_balance -= 1
+ results << token
+ when :on_tstring_content
+ if embexpr_balance == 0
+ lineno = token[0][0]
+ column = token[0][1]
+
+ token.value.split(/(?<=\n)/).each_with_index do |value, index|
+ column = 0 if index > 0
+ results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ lineno += 1
+ end
+ else
+ results << token
+ end
+ else
+ results << token
+ end
+ end
+
+ return results
+ end
+
+ # Otherwise, we're going to run through each token in the list and
+ # insert on_ignored_sp tokens for the amount of dedent that we need to
+ # perform. We also need to remove the dedent from the beginning of
+ # each line of plain string content tokens.
+ results = []
+ dedent_next = true
+ embexpr_balance = 0
+
+ tokens.each do |token|
+ # Notice that the structure of this conditional largely matches the
+ # whitespace calculation we performed above. This is because
+ # checking if the subsequent token needs to be dedented is common to
+ # both the dedent calculation and the ignored_sp insertion.
+ case token.event
+ when :on_embexpr_beg
+ embexpr_balance += 1
+ results << token
+ when :on_embexpr_end
+ embexpr_balance -= 1
+ results << token
+ when :on_tstring_content
+ if embexpr_balance == 0
+ # Here we're going to split the string on newlines, but maintain
+ # the newlines in the resulting array. We'll do that with a look
+ # behind assertion.
+ splits = token.value.split(/(?<=\n)/)
+ index = 0
+
+ while index < splits.length
+ line = splits[index]
+ lineno = token[0][0] + index
+ column = token[0][1]
+
+ # Blank lines do not count toward common leading whitespace
+ # calculation and do not need to be dedented.
+ if dedent_next || index > 0
+ column = 0
+ end
+
+ # If the dedent is 0 and we're not supposed to dedent the next
+ # line or this line doesn't start with whitespace, then we
+ # should concatenate the rest of the string to match ripper.
+ if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
+ line = splits[index..].join
+ index = splits.length
+ end
+
+ # If we are supposed to dedent this line or if this is not the
+ # first line of the string and this line isn't entirely blank,
+ # then we need to insert an on_ignored_sp token and remove the
+ # dedent from the beginning of the line.
+ if (dedent > 0) && (dedent_next || index > 0)
+ deleting = 0
+ deleted_chars = []
+
+ # Gather up all of the characters that we're going to
+ # delete, stopping when you hit a character that would put
+ # you over the dedent amount.
+ line.each_char.with_index do |char, i|
+ case char
+ when "\r"
+ if line.chars[i + 1] == "\n"
+ break
+ end
+ when "\n"
+ break
+ when "\t"
+ deleting = deleting - (deleting % TAB_WIDTH) + TAB_WIDTH
+ else
+ deleting += 1
+ end
+
+ break if deleting > dedent
+ deleted_chars << char
+ end
+
+ # If we have something to delete, then delete it from the
+ # string and insert an on_ignored_sp token.
+ if deleted_chars.any?
+ ignored = deleted_chars.join
+ line.delete_prefix!(ignored)
+
+ results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+ column = ignored.length
+ end
+ end
+
+ results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+ index += 1
+ end
+ else
+ results << token
+ end
+ else
+ results << token
+ end
+
+ dedent_next =
+ ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+ embexpr_balance == 0
+ end
+
+ results
+ end
+ end
+
+ # Here we will split between the two types of heredocs and return the
+ # object that will store their tokens.
+ def self.build(opening)
+ case opening.value[2]
+ when "~"
+ DedentingHeredoc.new
+ when "-"
+ DashHeredoc.new(opening.value[3] != "'")
+ else
+ PlainHeredoc.new
+ end
+ end
+ end
+
+ attr_reader :source, :filepath
+
+ def initialize(source, filepath = "")
+ @source = source
+ @filepath = filepath || ""
+ end
+
+ def result
+ tokens = []
+
+ state = :default
+ heredoc_stack = [[]]
+
+ result = YARP.lex(source, @filepath)
+ result_value = result.value
+ previous_state = nil
+
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+ # first token, so we had to have a hack in place to account for that. This
+ # checks for that behavior.
+ bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
+ bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
+
+ result_value.each_with_index do |(token, lex_state), index|
+ lineno = token.location.start_line
+ column = token.location.start_column
+
+ # If there's a UTF-8 byte-order mark as the start of the file, then for
+ # certain tokens ripper sets the first token back by 3 bytes. It also
+ # keeps the byte order mark in the first token's value. This is weird,
+ # and I don't want to mirror that in our parser. So instead, we'll match
+ # up the columns and values here.
+ if bom && lineno == 1
+ column -= 3
+
+ if index == 0 && column == 0 && !bom_flushed
+ flushed =
+ case token.type
+ when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
+ :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
+ :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
+ :PERCENT_UPPER_W, :STRING_BEGIN
+ true
+ when :REGEXP_BEGIN, :SYMBOL_BEGIN
+ token.value.start_with?("%")
+ else
+ false
+ end
+
+ unless flushed
+ column -= 3
+ value = token.value
+ value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
+ end
+ end
+ end
+
+ event = RIPPER.fetch(token.type)
+ value = token.value
+ lex_state = Ripper::Lexer::State.new(lex_state)
+
+ token =
+ case event
+ when :on___end__
+ EndContentToken.new([[lineno, column], event, value, lex_state])
+ when :on_comment
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+ when :on_heredoc_end
+ # Heredoc end tokens can be emitted in an odd order, so we don't
+ # want to bother comparing the state on them.
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+ when :on_ident
+ if lex_state == Ripper::EXPR_END
+ # If we have an identifier that follows a method name like:
+ #
+ # def foo bar
+ #
+ # then Ripper will mark bar as END|LABEL if there is a local in a
+ # parent scope named bar because it hasn't pushed the local table
+ # yet. We do this more accurately, so we need to allow comparing
+ # against both END and END|LABEL.
+ ParamToken.new([[lineno, column], event, value, lex_state])
+ elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
+ # In the event that we're comparing identifiers, we're going to
+ # allow a little divergence. Ripper doesn't account for local
+ # variables introduced through named captures in regexes, and we
+ # do, which accounts for this difference.
+ IdentToken.new([[lineno, column], event, value, lex_state])
+ else
+ Token.new([[lineno, column], event, value, lex_state])
+ end
+ when :on_embexpr_end
+ IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+ when :on_ignored_nl
+ # Ignored newlines can occasionally have a LABEL state attached to
+ # them which doesn't actually impact anything. We don't mirror that
+ # state so we ignored it.
+ IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
+ when :on_regexp_end
+ # On regex end, Ripper scans and then sets end state, so the ripper
+ # lexed output is begin, when it should be end. YARP sets lex state
+ # correctly to end state, but we want to be able to compare against
+ # Ripper's lexed state. So here, if it's a regexp end token, we
+ # output the state as the previous state, solely for the sake of
+ # comparison.
+ previous_token = result_value[index - 1][0]
+ lex_state =
+ if RIPPER.fetch(previous_token.type) == :on_embexpr_end
+ # If the previous token is embexpr_end, then we have to do even
+ # more processing. The end of an embedded expression sets the
+ # state to the state that it had at the beginning of the
+ # embedded expression. So we have to go and find that state and
+ # set it here.
+ counter = 1
+ current_index = index - 1
+
+ until counter == 0
+ current_index -= 1
+ current_event = RIPPER.fetch(result_value[current_index][0].type)
+ counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
+ end
+
+ Ripper::Lexer::State.new(result_value[current_index][1])
+ else
+ previous_state
+ end
+
+ Token.new([[lineno, column], event, value, lex_state])
+ when :on_eof
+ previous_token = result_value[index - 1][0]
+
+ # If we're at the end of the file and the previous token was a
+ # comment and there is still whitespace after the comment, then
+ # Ripper will append a on_nl token (even though there isn't
+ # necessarily a newline). We mirror that here.
+ start_offset = previous_token.location.end_offset
+ end_offset = token.location.start_offset
+
+ if previous_token.type == :COMMENT && start_offset < end_offset
+ if bom
+ start_offset += 3
+ end_offset += 3
+ end
+
+ tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+ end
+
+ Token.new([[lineno, column], event, value, lex_state])
+ else
+ Token.new([[lineno, column], event, value, lex_state])
+ end
+
+ previous_state = lex_state
+
+ # The order in which tokens appear in our lexer is different from the
+ # order that they appear in Ripper. When we hit the declaration of a
+ # heredoc in YARP, we skip forward and lex the rest of the content of
+ # the heredoc before going back and lexing at the end of the heredoc
+ # identifier.
+ #
+ # To match up to ripper, we keep a small state variable around here to
+ # track whether we're in the middle of a heredoc or not. In this way we
+ # can shuffle around the token to match Ripper's output.
+ case state
+ when :default
+ # The default state is when there are no heredocs at all. In this
+ # state we can append the token to the list of tokens and move on.
+ tokens << token
+
+ # If we get the declaration of a heredoc, then we open a new heredoc
+ # and move into the heredoc_opened state.
+ if event == :on_heredoc_beg
+ state = :heredoc_opened
+ heredoc_stack.last << Heredoc.build(token)
+ end
+ when :heredoc_opened
+ # The heredoc_opened state is when we've seen the declaration of a
+ # heredoc and are now lexing the body of the heredoc. In this state we
+ # push tokens onto the most recently created heredoc.
+ heredoc_stack.last.last << token
+
+ case event
+ when :on_heredoc_beg
+ # If we receive a heredoc declaration while lexing the body of a
+ # heredoc, this means we have nested heredocs. In this case we'll
+ # push a new heredoc onto the stack and stay in the heredoc_opened
+ # state since we're now lexing the body of the new heredoc.
+ heredoc_stack << [Heredoc.build(token)]
+ when :on_heredoc_end
+ # If we receive the end of a heredoc, then we're done lexing the
+ # body of the heredoc. In this case we now have a completed heredoc
+ # but need to wait for the next newline to push it into the token
+ # stream.
+ state = :heredoc_closed
+ end
+ when :heredoc_closed
+ if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
+ if heredoc_stack.size > 1
+ flushing = heredoc_stack.pop
+ heredoc_stack.last.last << token
+
+ flushing.each do |heredoc|
+ heredoc.to_a.each do |flushed_token|
+ heredoc_stack.last.last << flushed_token
+ end
+ end
+
+ state = :heredoc_opened
+ next
+ end
+ elsif event == :on_heredoc_beg
+ tokens << token
+ state = :heredoc_opened
+ heredoc_stack.last << Heredoc.build(token)
+ next
+ elsif heredoc_stack.size > 1
+ heredoc_stack[-2].last << token
+ next
+ end
+
+ heredoc_stack.last.each do |heredoc|
+ tokens.concat(heredoc.to_a)
+ end
+
+ heredoc_stack.last.clear
+ state = :default
+
+ tokens << token
+ end
+ end
+
+ # Drop the EOF token from the list
+ tokens = tokens[0...-1]
+
+ # We sort by location to compare against Ripper's output
+ tokens.sort_by!(&:location)
+
+ if result_value.size - 1 > tokens.size
+ raise StandardError, "Lost tokens when performing lex_compat"
+ end
+
+ ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
+ end
+ end
+
+ # This is a class that wraps the Ripper lexer to produce almost exactly the
+ # same tokens.
+ class LexRipper
+ attr_reader :source
+
+ def initialize(source)
+ @source = source
+ end
+
+ def result
+ previous = []
+ results = []
+
+ Ripper.lex(source, raise_errors: true).each do |token|
+ case token[1]
+ when :on_sp
+ # skip
+ when :on_tstring_content
+ if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
+ previous[2] << token[2]
+ else
+ results << token
+ previous = token
+ end
+ when :on_words_sep
+ if previous[1] == :on_words_sep
+ previous[2] << token[2]
+ else
+ results << token
+ previous = token
+ end
+ else
+ results << token
+ previous = token
+ end
+ end
+
+ results
+ end
+ end
+end
diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb
new file mode 100644
index 0000000000..760b3d75df
--- /dev/null
+++ b/lib/prism/node_ext.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+# Here we are reopening the YARP module to provide methods on nodes that aren't
+# templated and are meant as convenience methods.
+module YARP
+ class FloatNode < Node
+ # Returns the value of the node as a Ruby Float.
+ def value
+ Float(slice)
+ end
+ end
+
+ class ImaginaryNode < Node
+ # Returns the value of the node as a Ruby Complex.
+ def value
+ Complex(0, numeric.value)
+ end
+ end
+
+ class IntegerNode < Node
+ # Returns the value of the node as a Ruby Integer.
+ def value
+ Integer(slice)
+ end
+ end
+
+ class InterpolatedRegularExpressionNode < Node
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ def options
+ o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
+ o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
+ o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
+ o
+ end
+ end
+
+ class RationalNode < Node
+ # Returns the value of the node as a Ruby Rational.
+ def value
+ Rational(slice.chomp("r"))
+ end
+ end
+
+ class RegularExpressionNode < Node
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ def options
+ o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
+ o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
+ o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
+ o
+ end
+ end
+end
diff --git a/lib/prism/node_inspector.rb b/lib/prism/node_inspector.rb
new file mode 100644
index 0000000000..c09840a471
--- /dev/null
+++ b/lib/prism/node_inspector.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+module YARP
+ # This object is responsible for generating the output for the inspect method
+ # implementations of child nodes.
+ class NodeInspector
+ attr_reader :prefix, :output
+
+ def initialize(prefix = "")
+ @prefix = prefix
+ @output = +""
+ end
+
+ # Appends a line to the output with the current prefix.
+ def <<(line)
+ output << "#{prefix}#{line}"
+ end
+
+ # This generates a string that is used as the header of the inspect output
+ # for any given node.
+ def header(node)
+ output = +"@ #{node.class.name.split("::").last} ("
+ output << "location: (#{node.location.start_line},#{node.location.start_column})-(#{node.location.end_line},#{node.location.end_column})"
+ output << ", newline: true" if node.newline?
+ output << ")\n"
+ output
+ end
+
+ # Generates a string that represents a list of nodes. It handles properly
+ # using the box drawing characters to make the output look nice.
+ def list(prefix, nodes)
+ output = +"(length: #{nodes.length})\n"
+ last_index = nodes.length - 1
+
+ nodes.each_with_index do |node, index|
+ pointer, preadd = (index == last_index) ? ["└── ", " "] : ["├── ", "│ "]
+ node_prefix = "#{prefix}#{preadd}"
+ output << node.inspect(NodeInspector.new(node_prefix)).sub(node_prefix, "#{prefix}#{pointer}")
+ end
+
+ output
+ end
+
+ # Generates a string that represents a location field on a node.
+ def location(value)
+ if value
+ "(#{value.start_line},#{value.start_column})-(#{value.end_line},#{value.end_column}) = #{value.slice.inspect}"
+ else
+ "∅"
+ end
+ end
+
+ # Generates a string that represents a child node.
+ def child_node(node, append)
+ node.inspect(child_inspector(append)).delete_prefix(prefix)
+ end
+
+ # Returns a new inspector that can be used to inspect a child node.
+ def child_inspector(append)
+ NodeInspector.new("#{prefix}#{append}")
+ end
+
+ # Returns the output as a string.
+ def to_str
+ output
+ end
+ end
+end
diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb
new file mode 100644
index 0000000000..83f5569923
--- /dev/null
+++ b/lib/prism/pack.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+
+module YARP
+ module Pack
+ %i[
+ SPACE
+ COMMENT
+ INTEGER
+ UTF8
+ BER
+ FLOAT
+ STRING_SPACE_PADDED
+ STRING_NULL_PADDED
+ STRING_NULL_TERMINATED
+ STRING_MSB
+ STRING_LSB
+ STRING_HEX_HIGH
+ STRING_HEX_LOW
+ STRING_UU
+ STRING_MIME
+ STRING_BASE64
+ STRING_FIXED
+ STRING_POINTER
+ MOVE
+ BACK
+ NULL
+
+ UNSIGNED
+ SIGNED
+ SIGNED_NA
+
+ AGNOSTIC_ENDIAN
+ LITTLE_ENDIAN
+ BIG_ENDIAN
+ NATIVE_ENDIAN
+ ENDIAN_NA
+
+ SIZE_SHORT
+ SIZE_INT
+ SIZE_LONG
+ SIZE_LONG_LONG
+ SIZE_8
+ SIZE_16
+ SIZE_32
+ SIZE_64
+ SIZE_P
+ SIZE_NA
+
+ LENGTH_FIXED
+ LENGTH_MAX
+ LENGTH_RELATIVE
+ LENGTH_NA
+ ].each do |const|
+ const_set(const, const)
+ end
+
+ class Directive
+ attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
+
+ def initialize(version, variant, source, type, signed, endian, size, length_type, length)
+ @version = version
+ @variant = variant
+ @source = source
+ @type = type
+ @signed = signed
+ @endian = endian
+ @size = size
+ @length_type = length_type
+ @length = length
+ end
+
+ ENDIAN_DESCRIPTIONS = {
+ AGNOSTIC_ENDIAN: 'agnostic',
+ LITTLE_ENDIAN: 'little-endian (VAX)',
+ BIG_ENDIAN: 'big-endian (network)',
+ NATIVE_ENDIAN: 'native-endian',
+ ENDIAN_NA: 'n/a'
+ }
+
+ SIGNED_DESCRIPTIONS = {
+ UNSIGNED: 'unsigned',
+ SIGNED: 'signed',
+ SIGNED_NA: 'n/a'
+ }
+
+ SIZE_DESCRIPTIONS = {
+ SIZE_SHORT: 'short',
+ SIZE_INT: 'int-width',
+ SIZE_LONG: 'long',
+ SIZE_LONG_LONG: 'long long',
+ SIZE_8: '8-bit',
+ SIZE_16: '16-bit',
+ SIZE_32: '32-bit',
+ SIZE_64: '64-bit',
+ SIZE_P: 'pointer-width'
+ }
+
+ def describe
+ case type
+ when SPACE
+ 'whitespace'
+ when COMMENT
+ 'comment'
+ when INTEGER
+ if size == SIZE_8
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
+ else
+ base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
+ end
+ case length_type
+ when LENGTH_FIXED
+ if length > 1
+ base + ", x#{length}"
+ else
+ base
+ end
+ when LENGTH_MAX
+ base + ', as many as possible'
+ end
+ when UTF8
+ 'UTF-8 character'
+ when BER
+ 'BER-compressed integer'
+ when FLOAT
+ "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
+ when STRING_SPACE_PADDED
+ 'arbitrary binary string (space padded)'
+ when STRING_NULL_PADDED
+ 'arbitrary binary string (null padded, count is width)'
+ when STRING_NULL_TERMINATED
+ 'arbitrary binary string (null padded, count is width), except that null is added with *'
+ when STRING_MSB
+ 'bit string (MSB first)'
+ when STRING_LSB
+ 'bit string (LSB first)'
+ when STRING_HEX_HIGH
+ 'hex string (high nibble first)'
+ when STRING_HEX_LOW
+ 'hex string (low nibble first)'
+ when STRING_UU
+ 'UU-encoded string'
+ when STRING_MIME
+ 'quoted printable, MIME encoding'
+ when STRING_BASE64
+ 'base64 encoded string'
+ when STRING_FIXED
+ 'pointer to a structure (fixed-length string)'
+ when STRING_POINTER
+ 'pointer to a null-terminated string'
+ when MOVE
+ 'move to absolute position'
+ when BACK
+ 'back up a byte'
+ when NULL
+ 'null byte'
+ else
+ raise
+ end
+ end
+ end
+
+ class Format
+ attr_reader :directives, :encoding
+
+ def initialize(directives, encoding)
+ @directives = directives
+ @encoding = encoding
+ end
+
+ def describe
+ source_width = directives.map { |d| d.source.inspect.length }.max
+ directive_lines = directives.map do |directive|
+ if directive.type == SPACE
+ source = directive.source.inspect
+ else
+ source = directive.source
+ end
+ " #{source.ljust(source_width)} #{directive.describe}"
+ end
+
+ (['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
+ end
+ end
+ end
+end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
new file mode 100644
index 0000000000..2d9d855b86
--- /dev/null
+++ b/lib/prism/parse_result.rb
@@ -0,0 +1,266 @@
+# frozen_string_literal: true
+
+module YARP
+ # This represents a source of Ruby code that has been parsed. It is used in
+ # conjunction with locations to allow them to resolve line numbers and source
+ # ranges.
+ class Source
+ attr_reader :source, :offsets
+
+ def initialize(source, offsets = compute_offsets(source))
+ @source = source
+ @offsets = offsets
+ end
+
+ def slice(offset, length)
+ source.byteslice(offset, length)
+ end
+
+ def line(value)
+ offsets.bsearch_index { |offset| offset > value } || offsets.length
+ end
+
+ def line_offset(value)
+ offsets[line(value) - 1]
+ end
+
+ def column(value)
+ value - offsets[line(value) - 1]
+ end
+
+ private
+
+ def compute_offsets(code)
+ offsets = [0]
+ code.b.scan("\n") { offsets << $~.end(0) }
+ offsets
+ end
+ end
+
+ # This represents a location in the source.
+ class Location
+ # A Source object that is used to determine more information from the given
+ # offset and length.
+ protected attr_reader :source
+
+ # The byte offset from the beginning of the source where this location
+ # starts.
+ attr_reader :start_offset
+
+ # The length of this location in bytes.
+ attr_reader :length
+
+ # The list of comments attached to this location
+ attr_reader :comments
+
+ def initialize(source, start_offset, length)
+ @source = source
+ @start_offset = start_offset
+ @length = length
+ @comments = []
+ end
+
+ # Create a new location object with the given options.
+ def copy(**options)
+ Location.new(
+ options.fetch(:source) { source },
+ options.fetch(:start_offset) { start_offset },
+ options.fetch(:length) { length }
+ )
+ end
+
+ # Returns a string representation of this location.
+ def inspect
+ "#<YARP::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
+ end
+
+ # The source code that this location represents.
+ def slice
+ source.slice(start_offset, length)
+ end
+
+ # The byte offset from the beginning of the source where this location ends.
+ def end_offset
+ start_offset + length
+ end
+
+ # The line number where this location starts.
+ def start_line
+ source.line(start_offset)
+ end
+
+ # The content of the line where this location starts before this location.
+ def start_line_slice
+ offset = source.line_offset(start_offset)
+ source.slice(offset, start_offset - offset)
+ end
+
+ # The line number where this location ends.
+ def end_line
+ source.line(end_offset - 1)
+ end
+
+ # The column number in bytes where this location starts from the start of
+ # the line.
+ def start_column
+ source.column(start_offset)
+ end
+
+ # The column number in bytes where this location ends from the start of the
+ # line.
+ def end_column
+ source.column(end_offset)
+ end
+
+ def deconstruct_keys(keys)
+ { start_offset: start_offset, end_offset: end_offset }
+ end
+
+ def pretty_print(q)
+ q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column}))")
+ end
+
+ def ==(other)
+ other.is_a?(Location) &&
+ other.start_offset == start_offset &&
+ other.end_offset == end_offset
+ end
+
+ # Returns a new location that stretches from this location to the given
+ # other location. Raises an error if this location is not before the other
+ # location or if they don't share the same source.
+ def join(other)
+ raise "Incompatible sources" if source != other.source
+ raise "Incompatible locations" if start_offset > other.start_offset
+
+ Location.new(source, start_offset, other.end_offset - start_offset)
+ end
+
+ def self.null
+ new(0, 0)
+ end
+ end
+
+ # This represents a comment that was encountered during parsing.
+ class Comment
+ TYPES = [:inline, :embdoc, :__END__]
+
+ attr_reader :type, :location
+
+ def initialize(type, location)
+ @type = type
+ @location = location
+ end
+
+ def deconstruct_keys(keys)
+ { type: type, location: location }
+ end
+
+ # Returns true if the comment happens on the same line as other code and false if the comment is by itself
+ def trailing?
+ type == :inline && !location.start_line_slice.strip.empty?
+ end
+
+ def inspect
+ "#<YARP::Comment @type=#{@type.inspect} @location=#{@location.inspect}>"
+ end
+ end
+
+ # This represents an error that was encountered during parsing.
+ class ParseError
+ attr_reader :message, :location
+
+ def initialize(message, location)
+ @message = message
+ @location = location
+ end
+
+ def deconstruct_keys(keys)
+ { message: message, location: location }
+ end
+
+ def inspect
+ "#<YARP::ParseError @message=#{@message.inspect} @location=#{@location.inspect}>"
+ end
+ end
+
+ # This represents a warning that was encountered during parsing.
+ class ParseWarning
+ attr_reader :message, :location
+
+ def initialize(message, location)
+ @message = message
+ @location = location
+ end
+
+ def deconstruct_keys(keys)
+ { message: message, location: location }
+ end
+
+ def inspect
+ "#<YARP::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect}>"
+ end
+ end
+
+ # This represents the result of a call to ::parse or ::parse_file. It contains
+ # the AST, any comments that were encounters, and any errors that were
+ # encountered.
+ class ParseResult
+ attr_reader :value, :comments, :errors, :warnings, :source
+
+ def initialize(value, comments, errors, warnings, source)
+ @value = value
+ @comments = comments
+ @errors = errors
+ @warnings = warnings
+ @source = source
+ end
+
+ def deconstruct_keys(keys)
+ { value: value, comments: comments, errors: errors, warnings: warnings }
+ end
+
+ def success?
+ errors.empty?
+ end
+
+ def failure?
+ !success?
+ end
+ end
+
+ # This represents a token from the Ruby source.
+ class Token
+ attr_reader :type, :value, :location
+
+ def initialize(type, value, location)
+ @type = type
+ @value = value
+ @location = location
+ end
+
+ def deconstruct_keys(keys)
+ { type: type, value: value, location: location }
+ end
+
+ def pretty_print(q)
+ q.group do
+ q.text(type.to_s)
+ self.location.pretty_print(q)
+ q.text("(")
+ q.nest(2) do
+ q.breakable("")
+ q.pp(value)
+ end
+ q.breakable("")
+ q.text(")")
+ end
+ end
+
+ def ==(other)
+ other.is_a?(Token) &&
+ other.type == type &&
+ other.value == value
+ end
+ end
+end
diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb
new file mode 100644
index 0000000000..88240609b1
--- /dev/null
+++ b/lib/prism/parse_result/comments.rb
@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+
+module YARP
+ class ParseResult
+ # When we've parsed the source, we have both the syntax tree and the list of
+ # comments that we found in the source. This class is responsible for
+ # walking the tree and finding the nearest location to attach each comment.
+ #
+ # It does this by first finding the nearest locations to each comment.
+ # Locations can either come from nodes directly or from location fields on
+ # nodes. For example, a `ClassNode` has an overall location encompassing the
+ # entire class, but it also has a location for the `class` keyword.
+ #
+ # Once the nearest locations are found, it determines which one to attach
+ # to. If it's a trailing comment (a comment on the same line as other source
+ # code), it will favor attaching to the nearest location that occurs before
+ # the comment. Otherwise it will favor attaching to the nearest location
+ # that is after the comment.
+ class Comments
+ # A target for attaching comments that is based on a specific node's
+ # location.
+ class NodeTarget
+ attr_reader :node
+
+ def initialize(node)
+ @node = node
+ end
+
+ def start_offset
+ node.location.start_offset
+ end
+
+ def end_offset
+ node.location.end_offset
+ end
+
+ def encloses?(comment)
+ start_offset <= comment.location.start_offset &&
+ comment.location.end_offset <= end_offset
+ end
+
+ def <<(comment)
+ node.location.comments << comment
+ end
+ end
+
+ # A target for attaching comments that is based on a location field on a
+ # node. For example, the `end` token of a ClassNode.
+ class LocationTarget
+ attr_reader :location
+
+ def initialize(location)
+ @location = location
+ end
+
+ def start_offset
+ location.start_offset
+ end
+
+ def end_offset
+ location.end_offset
+ end
+
+ def encloses?(comment)
+ false
+ end
+
+ def <<(comment)
+ location.comments << comment
+ end
+ end
+
+ attr_reader :parse_result
+
+ def initialize(parse_result)
+ @parse_result = parse_result
+ end
+
+ def attach!
+ parse_result.comments.each do |comment|
+ preceding, enclosing, following = nearest_targets(parse_result.value, comment)
+ target =
+ if comment.trailing?
+ preceding || following || enclosing || NodeTarget.new(parse_result.value)
+ else
+ # If a comment exists on its own line, prefer a leading comment.
+ following || preceding || enclosing || NodeTarget.new(parse_result.value)
+ end
+
+ target << comment
+ end
+ end
+
+ private
+
+ # Responsible for finding the nearest targets to the given comment within
+ # the context of the given encapsulating node.
+ def nearest_targets(node, comment)
+ comment_start = comment.location.start_offset
+ comment_end = comment.location.end_offset
+
+ targets = []
+ node.comment_targets.map do |value|
+ case value
+ when StatementsNode
+ targets.concat(value.body.map { |node| NodeTarget.new(node) })
+ when Node
+ targets << NodeTarget.new(value)
+ when Location
+ targets << LocationTarget.new(value)
+ end
+ end
+
+ targets.sort_by!(&:start_offset)
+ preceding = nil
+ following = nil
+
+ left = 0
+ right = targets.length
+
+ # This is a custom binary search that finds the nearest nodes to the
+ # given comment. When it finds a node that completely encapsulates the
+ # comment, it recurses downward into the tree.
+ while left < right
+ middle = (left + right) / 2
+ target = targets[middle]
+
+ target_start = target.start_offset
+ target_end = target.end_offset
+
+ if target.encloses?(comment)
+ # The comment is completely contained by this target. Abandon the
+ # binary search at this level.
+ return nearest_targets(target.node, comment)
+ end
+
+ if target_end <= comment_start
+ # This target falls completely before the comment. Because we will
+ # never consider this target or any targets before it again, this
+ # target must be the closest preceding target we have encountered so
+ # far.
+ preceding = target
+ left = middle + 1
+ next
+ end
+
+ if comment_end <= target_start
+ # This target falls completely after the comment. Because we will
+ # never consider this target or any targets after it again, this
+ # target must be the closest following target we have encountered so
+ # far.
+ following = target
+ right = middle
+ next
+ end
+
+ # This should only happen if there is a bug in this parser.
+ raise "Comment location overlaps with a target location"
+ end
+
+ [preceding, NodeTarget.new(node), following]
+ end
+ end
+
+ private_constant :Comments
+
+ # Attach the list of comments to their respective locations in the tree.
+ def attach_comments!
+ Comments.new(self).attach!
+ end
+ end
+end
diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb
new file mode 100644
index 0000000000..d16600afd0
--- /dev/null
+++ b/lib/prism/parse_result/newlines.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module YARP
+ class ParseResult
+ # The :line tracepoint event gets fired whenever the Ruby VM encounters an
+ # expression on a new line. The types of expressions that can trigger this
+ # event are:
+ #
+ # * if statements
+ # * unless statements
+ # * nodes that are children of statements lists
+ #
+ # In order to keep track of the newlines, we have a list of offsets that
+ # come back from the parser. We assign these offsets to the first nodes that
+ # we find in the tree that are on those lines.
+ #
+ # Note that the logic in this file should be kept in sync with the Java
+ # MarkNewlinesVisitor, since that visitor is responsible for marking the
+ # newlines for JRuby/TruffleRuby.
+ class Newlines < Visitor
+ def initialize(newline_marked)
+ @newline_marked = newline_marked
+ end
+
+ def visit_block_node(node)
+ old_newline_marked = @newline_marked
+ @newline_marked = Array.new(old_newline_marked.size, false)
+
+ begin
+ super(node)
+ ensure
+ @newline_marked = old_newline_marked
+ end
+ end
+
+ alias_method :visit_lambda_node, :visit_block_node
+
+ def visit_if_node(node)
+ node.set_newline_flag(@newline_marked)
+ super(node)
+ end
+
+ alias_method :visit_unless_node, :visit_if_node
+
+ def visit_statements_node(node)
+ node.body.each do |child|
+ child.set_newline_flag(@newline_marked)
+ end
+ super(node)
+ end
+ end
+
+ private_constant :Newlines
+
+ # Walk the tree and mark nodes that are on a new line.
+ def mark_newlines!
+ value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
+ end
+ end
+end
diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb
new file mode 100644
index 0000000000..f7519137e4
--- /dev/null
+++ b/lib/prism/pattern.rb
@@ -0,0 +1,239 @@
+# frozen_string_literal: true
+
+module YARP
+ # A pattern is an object that wraps a Ruby pattern matching expression. The
+ # expression would normally be passed to an `in` clause within a `case`
+ # expression or a rightward assignment expression. For example, in the
+ # following snippet:
+ #
+ # case node
+ # in ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]
+ # end
+ #
+ # the pattern is the `ConstantPathNode[...]` expression.
+ #
+ # The pattern gets compiled into an object that responds to #call by running
+ # the #compile method. This method itself will run back through YARP to
+ # parse the expression into a tree, then walk the tree to generate the
+ # necessary callable objects. For example, if you wanted to compile the
+ # expression above into a callable, you would:
+ #
+ # callable = YARP::Pattern.new("ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]").compile
+ # callable.call(node)
+ #
+ # The callable object returned by #compile is guaranteed to respond to #call
+ # with a single argument, which is the node to match against. It also is
+ # guaranteed to respond to #===, which means it itself can be used in a `case`
+ # expression, as in:
+ #
+ # case node
+ # when callable
+ # end
+ #
+ # If the query given to the initializer cannot be compiled into a valid
+ # matcher (either because of a syntax error or because it is using syntax we
+ # do not yet support) then a YARP::Pattern::CompilationError will be
+ # raised.
+ class Pattern
+ # Raised when the query given to a pattern is either invalid Ruby syntax or
+ # is using syntax that we don't yet support.
+ class CompilationError < StandardError
+ def initialize(repr)
+ super(<<~ERROR)
+ YARP was unable to compile the pattern you provided into a usable
+ expression. It failed on to understand the node represented by:
+
+ #{repr}
+
+ Note that not all syntax supported by Ruby's pattern matching syntax
+ is also supported by YARP's patterns. If you're using some syntax
+ that you believe should be supported, please open an issue on
+ GitHub at https://github.com/ruby/yarp/issues/new.
+ ERROR
+ end
+ end
+
+ attr_reader :query
+
+ def initialize(query)
+ @query = query
+ @compiled = nil
+ end
+
+ def compile
+ result = YARP.parse("case nil\nin #{query}\nend")
+ compile_node(result.value.statements.body.last.conditions.last.pattern)
+ end
+
+ def scan(root)
+ return to_enum(__method__, root) unless block_given?
+
+ @compiled ||= compile
+ queue = [root]
+
+ while (node = queue.shift)
+ yield node if @compiled.call(node)
+ queue.concat(node.compact_child_nodes)
+ end
+ end
+
+ private
+
+ # Shortcut for combining two procs into one that returns true if both return
+ # true.
+ def combine_and(left, right)
+ ->(other) { left.call(other) && right.call(other) }
+ end
+
+ # Shortcut for combining two procs into one that returns true if either
+ # returns true.
+ def combine_or(left, right)
+ ->(other) { left.call(other) || right.call(other) }
+ end
+
+ # Raise an error because the given node is not supported.
+ def compile_error(node)
+ raise CompilationError, node.inspect
+ end
+
+ # in [foo, bar, baz]
+ def compile_array_pattern_node(node)
+ compile_error(node) if !node.rest.nil? || node.posts.any?
+
+ constant = node.constant
+ compiled_constant = compile_node(constant) if constant
+
+ preprocessed = node.requireds.map { |required| compile_node(required) }
+
+ compiled_requireds = ->(other) do
+ deconstructed = other.deconstruct
+
+ deconstructed.length == preprocessed.length &&
+ preprocessed
+ .zip(deconstructed)
+ .all? { |(matcher, value)| matcher.call(value) }
+ end
+
+ if compiled_constant
+ combine_and(compiled_constant, compiled_requireds)
+ else
+ compiled_requireds
+ end
+ end
+
+ # in foo | bar
+ def compile_alternation_pattern_node(node)
+ combine_or(compile_node(node.left), compile_node(node.right))
+ end
+
+ # in YARP::ConstantReadNode
+ def compile_constant_path_node(node)
+ parent = node.parent
+
+ if parent.is_a?(ConstantReadNode) && parent.slice == "YARP"
+ compile_node(node.child)
+ else
+ compile_error(node)
+ end
+ end
+
+ # in ConstantReadNode
+ # in String
+ def compile_constant_read_node(node)
+ value = node.slice
+
+ if YARP.const_defined?(value, false)
+ clazz = YARP.const_get(value)
+
+ ->(other) { clazz === other }
+ elsif Object.const_defined?(value, false)
+ clazz = Object.const_get(value)
+
+ ->(other) { clazz === other }
+ else
+ compile_error(node)
+ end
+ end
+
+ # in InstanceVariableReadNode[name: Symbol]
+ # in { name: Symbol }
+ def compile_hash_pattern_node(node)
+ compile_error(node) unless node.kwrest.nil?
+ compiled_constant = compile_node(node.constant) if node.constant
+
+ preprocessed =
+ node.assocs.to_h do |assoc|
+ [assoc.key.unescaped.to_sym, compile_node(assoc.value)]
+ end
+
+ compiled_keywords = ->(other) do
+ deconstructed = other.deconstruct_keys(preprocessed.keys)
+
+ preprocessed.all? do |keyword, matcher|
+ deconstructed.key?(keyword) && matcher.call(deconstructed[keyword])
+ end
+ end
+
+ if compiled_constant
+ combine_and(compiled_constant, compiled_keywords)
+ else
+ compiled_keywords
+ end
+ end
+
+ # in nil
+ def compile_nil_node(node)
+ ->(attribute) { attribute.nil? }
+ end
+
+ # in /foo/
+ def compile_regular_expression_node(node)
+ regexp = Regexp.new(node.unescaped, node.closing[1..])
+
+ ->(attribute) { regexp === attribute }
+ end
+
+ # in ""
+ # in "foo"
+ def compile_string_node(node)
+ string = node.unescaped
+
+ ->(attribute) { string === attribute }
+ end
+
+ # in :+
+ # in :foo
+ def compile_symbol_node(node)
+ symbol = node.unescaped.to_sym
+
+ ->(attribute) { symbol === attribute }
+ end
+
+ # Compile any kind of node. Dispatch out to the individual compilation
+ # methods based on the type of node.
+ def compile_node(node)
+ case node
+ when AlternationPatternNode
+ compile_alternation_pattern_node(node)
+ when ArrayPatternNode
+ compile_array_pattern_node(node)
+ when ConstantPathNode
+ compile_constant_path_node(node)
+ when ConstantReadNode
+ compile_constant_read_node(node)
+ when HashPatternNode
+ compile_hash_pattern_node(node)
+ when NilNode
+ compile_nil_node(node)
+ when RegularExpressionNode
+ compile_regular_expression_node(node)
+ when StringNode
+ compile_string_node(node)
+ when SymbolNode
+ compile_symbol_node(node)
+ else
+ compile_error(node)
+ end
+ end
+ end
+end
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
new file mode 100644
index 0000000000..d1a7bbbbcf
--- /dev/null
+++ b/lib/prism/prism.gemspec
@@ -0,0 +1,113 @@
+# frozen_string_literal: true
+
+Gem::Specification.new do |spec|
+ spec.name = "yarp"
+ spec.version = "0.12.0"
+ spec.authors = ["Shopify"]
+ spec.email = ["ruby@shopify.com"]
+
+ spec.summary = "Yet Another Ruby Parser"
+ spec.homepage = "https://github.com/ruby/yarp"
+ spec.license = "MIT"
+
+ spec.required_ruby_version = ">= 3.0.0"
+
+ spec.require_paths = ["lib"]
+ spec.files = [
+ "CHANGELOG.md",
+ "CODE_OF_CONDUCT.md",
+ "CONTRIBUTING.md",
+ "LICENSE.md",
+ "Makefile",
+ "README.md",
+ "config.yml",
+ "docs/build_system.md",
+ "docs/building.md",
+ "docs/configuration.md",
+ "docs/design.md",
+ "docs/encoding.md",
+ "docs/fuzzing.md",
+ "docs/heredocs.md",
+ "docs/mapping.md",
+ "docs/ripper.md",
+ "docs/ruby_api.md",
+ "docs/serialization.md",
+ "docs/testing.md",
+ "ext/yarp/api_node.c",
+ "ext/yarp/api_pack.c",
+ "ext/yarp/extension.c",
+ "ext/yarp/extension.h",
+ "include/yarp.h",
+ "include/yarp/ast.h",
+ "include/yarp/defines.h",
+ "include/yarp/diagnostic.h",
+ "include/yarp/enc/yp_encoding.h",
+ "include/yarp/node.h",
+ "include/yarp/pack.h",
+ "include/yarp/parser.h",
+ "include/yarp/regexp.h",
+ "include/yarp/unescape.h",
+ "include/yarp/util/yp_buffer.h",
+ "include/yarp/util/yp_char.h",
+ "include/yarp/util/yp_constant_pool.h",
+ "include/yarp/util/yp_list.h",
+ "include/yarp/util/yp_memchr.h",
+ "include/yarp/util/yp_newline_list.h",
+ "include/yarp/util/yp_state_stack.h",
+ "include/yarp/util/yp_string.h",
+ "include/yarp/util/yp_string_list.h",
+ "include/yarp/util/yp_strpbrk.h",
+ "include/yarp/version.h",
+ "lib/yarp.rb",
+ "lib/yarp/compiler.rb",
+ "lib/yarp/debug.rb",
+ "lib/yarp/desugar_compiler.rb",
+ "lib/yarp/dispatcher.rb",
+ "lib/yarp/dsl.rb",
+ "lib/yarp/ffi.rb",
+ "lib/yarp/lex_compat.rb",
+ "lib/yarp/mutation_compiler.rb",
+ "lib/yarp/node.rb",
+ "lib/yarp/node_ext.rb",
+ "lib/yarp/node_inspector.rb",
+ "lib/yarp/pack.rb",
+ "lib/yarp/parse_result.rb",
+ "lib/yarp/pattern.rb",
+ "lib/yarp/ripper_compat.rb",
+ "lib/yarp/serialize.rb",
+ "lib/yarp/parse_result/comments.rb",
+ "lib/yarp/parse_result/newlines.rb",
+ "lib/yarp/visitor.rb",
+ "src/diagnostic.c",
+ "src/enc/yp_big5.c",
+ "src/enc/yp_euc_jp.c",
+ "src/enc/yp_gbk.c",
+ "src/enc/yp_shift_jis.c",
+ "src/enc/yp_tables.c",
+ "src/enc/yp_unicode.c",
+ "src/enc/yp_windows_31j.c",
+ "src/node.c",
+ "src/pack.c",
+ "src/prettyprint.c",
+ "src/regexp.c",
+ "src/serialize.c",
+ "src/token_type.c",
+ "src/unescape.c",
+ "src/util/yp_buffer.c",
+ "src/util/yp_char.c",
+ "src/util/yp_constant_pool.c",
+ "src/util/yp_list.c",
+ "src/util/yp_memchr.c",
+ "src/util/yp_newline_list.c",
+ "src/util/yp_state_stack.c",
+ "src/util/yp_string.c",
+ "src/util/yp_string_list.c",
+ "src/util/yp_strncasecmp.c",
+ "src/util/yp_strpbrk.c",
+ "src/yarp.c",
+ "yarp.gemspec",
+ ]
+
+ spec.extensions = ["ext/yarp/extconf.rb"]
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
+end
diff --git a/lib/prism/ripper_compat.rb b/lib/prism/ripper_compat.rb
new file mode 100644
index 0000000000..c76f3fd07a
--- /dev/null
+++ b/lib/prism/ripper_compat.rb
@@ -0,0 +1,174 @@
+# frozen_string_literal: true
+
+require "ripper"
+
+module YARP
+ # This class is meant to provide a compatibility layer between YARP and
+ # Ripper. It functions by parsing the entire tree first and then walking it
+ # and executing each of the Ripper callbacks as it goes.
+ #
+ # This class is going to necessarily be slower than the native Ripper API. It
+ # is meant as a stopgap until developers migrate to using YARP. It is also
+ # meant as a test harness for the YARP parser.
+ class RipperCompat
+ # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
+ # returns the arrays of [type, *children].
+ class SexpBuilder < RipperCompat
+ private
+
+ Ripper::PARSER_EVENTS.each do |event|
+ define_method(:"on_#{event}") do |*args|
+ [event, *args]
+ end
+ end
+
+ Ripper::SCANNER_EVENTS.each do |event|
+ define_method(:"on_#{event}") do |value|
+ [:"@#{event}", value, [lineno, column]]
+ end
+ end
+ end
+
+ # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
+ # returns the same values as ::Ripper::SexpBuilder except with a couple of
+ # niceties that flatten linked lists into arrays.
+ class SexpBuilderPP < SexpBuilder
+ private
+
+ def _dispatch_event_new
+ []
+ end
+
+ def _dispatch_event_push(list, item)
+ list << item
+ list
+ end
+
+ Ripper::PARSER_EVENT_TABLE.each do |event, arity|
+ case event
+ when /_new\z/
+ alias_method :"on_#{event}", :_dispatch_event_new if arity == 0
+ when /_add\z/
+ alias_method :"on_#{event}", :_dispatch_event_push
+ end
+ end
+ end
+
+ attr_reader :source, :lineno, :column
+
+ def initialize(source)
+ @source = source
+ @result = nil
+ @lineno = nil
+ @column = nil
+ end
+
+ ############################################################################
+ # Public interface
+ ############################################################################
+
+ def error?
+ result.errors.any?
+ end
+
+ def parse
+ result.value.accept(self) unless error?
+ end
+
+ ############################################################################
+ # Visitor methods
+ ############################################################################
+
+ def visit(node)
+ node&.accept(self)
+ end
+
+ def visit_call_node(node)
+ if !node.opening_loc && node.arguments.arguments.length == 1
+ bounds(node.receiver.location)
+ left = visit(node.receiver)
+
+ bounds(node.arguments.arguments.first.location)
+ right = visit(node.arguments.arguments.first)
+
+ on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
+ else
+ raise NotImplementedError
+ end
+ end
+
+ def visit_integer_node(node)
+ bounds(node.location)
+ on_int(source[node.location.start_offset...node.location.end_offset])
+ end
+
+ def visit_statements_node(node)
+ bounds(node.location)
+ node.body.inject(on_stmts_new) do |stmts, stmt|
+ on_stmts_add(stmts, visit(stmt))
+ end
+ end
+
+ def visit_token(node)
+ bounds(node.location)
+
+ case node.type
+ when :MINUS
+ on_op(node.value)
+ when :PLUS
+ on_op(node.value)
+ else
+ raise NotImplementedError, "Unknown token: #{node.type}"
+ end
+ end
+
+ def visit_program_node(node)
+ bounds(node.location)
+ on_program(visit(node.statements))
+ end
+
+ ############################################################################
+ # Entrypoints for subclasses
+ ############################################################################
+
+ # This is a convenience method that runs the SexpBuilder subclass parser.
+ def self.sexp_raw(source)
+ SexpBuilder.new(source).parse
+ end
+
+ # This is a convenience method that runs the SexpBuilderPP subclass parser.
+ def self.sexp(source)
+ SexpBuilderPP.new(source).parse
+ end
+
+ private
+
+ # This method is responsible for updating lineno and column information
+ # to reflect the current node.
+ #
+ # This method could be drastically improved with some caching on the start
+ # of every line, but for now it's good enough.
+ def bounds(location)
+ start_offset = location.start_offset
+
+ @lineno = source[0..start_offset].count("\n") + 1
+ @column = start_offset - (source.rindex("\n", start_offset) || 0)
+ end
+
+ def result
+ @result ||= YARP.parse(source)
+ end
+
+ def _dispatch0; end
+ def _dispatch1(_); end
+ def _dispatch2(_, _); end
+ def _dispatch3(_, _, _); end
+ def _dispatch4(_, _, _, _); end
+ def _dispatch5(_, _, _, _, _); end
+ def _dispatch7(_, _, _, _, _, _, _); end
+
+ (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
+ alias_method :"on_#{event}", :"_dispatch#{arity}"
+ end
+ end
+end
diff --git a/lib/prism/version.rb b/lib/prism/version.rb
new file mode 100644
index 0000000000..e450bfb526
--- /dev/null
+++ b/lib/prism/version.rb
@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+module YARP
+ VERSION = "0.8.0"
+end