Rename YARP filepaths to prism filepaths

author: Kevin Newton <kddnewton@gmail.com> 2023-09-27 12:22:36 -0400
committer: Kevin Newton <kddnewton@gmail.com> 2023-09-27 13:57:38 -0400
commit: 8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch)
tree: 46ef2bd5c51d5b7f923eda6a60edefc7a08200db /lib/prism
parent: 7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff)
15 files changed, 2955 insertions, 0 deletions
diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb
new file mode 100644
index 0000000000..39df1e838c
--- /dev/null
+++ b/lib/prism/debug.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+module YARP
+  # This module is used for testing and debugging and is not meant to be used by
+  # consumers of this library.
+  module Debug
+    class ISeq
+      attr_reader :parts
+
+      def initialize(parts)
+        @parts = parts
+      end
+
+      def type
+        parts[0]
+      end
+
+      def local_table
+        parts[10]
+      end
+
+      def instructions
+        parts[13]
+      end
+
+      def each_child
+        instructions.each do |instruction|
+          # Only look at arrays. Other instructions are line numbers or
+          # tracepoint events.
+          next unless instruction.is_a?(Array)
+
+          instruction.each do |opnd|
+            # Only look at arrays. Other operands are literals.
+            next unless opnd.is_a?(Array)
+
+            # Only look at instruction sequences. Other operands are literals.
+            next unless opnd[0] == "YARVInstructionSequence/SimpleDataFormat"
+
+            yield ISeq.new(opnd)
+          end
+        end
+      end
+    end
+
+    # For the given source, compiles with CRuby and returns a list of all of the
+    # sets of local variables that were encountered.
+    def self.cruby_locals(source)
+      verbose = $VERBOSE
+      $VERBOSE = nil
+
+      begin
+        locals = []
+        stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)]
+
+        while (iseq = stack.pop)
+          if iseq.type != :once
+            names = iseq.local_table
+
+            # CRuby will push on a special local variable when there are keyword
+            # arguments. We get rid of that here.
+            names = names.grep_v(Integer)
+
+            # For some reason, CRuby occasionally pushes this special local
+            # variable when there are splat arguments. We get rid of that here.
+            names = names.grep_v(:"#arg_rest")
+
+            # Now push them onto the list of locals.
+            locals << names
+          end
+
+          iseq.each_child { |child| stack << child }
+        end
+
+        locals
+      ensure
+        $VERBOSE = verbose
+      end
+    end
+
+    # For the given source, parses with YARP and returns a list of all of the
+    # sets of local variables that were encountered.
+    def self.yarp_locals(source)
+      locals = []
+      stack = [YARP.parse(source).value]
+
+      while (node = stack.pop)
+        case node
+        when BlockNode, DefNode, LambdaNode
+          names = node.locals
+
+          params = node.parameters
+          params = params&.parameters unless node.is_a?(DefNode)
+
+          # YARP places parameters in the same order that they appear in the
+          # source. CRuby places them in the order that they need to appear
+          # according to their own internal calling convention. We mimic that
+          # order here so that we can compare properly.
+          if params
+            sorted = [
+              *params.requireds.grep(RequiredParameterNode).map(&:name),
+              *params.optionals.map(&:name),
+              *((params.rest.name || :*) if params.rest && params.rest.operator != ","),
+              *params.posts.grep(RequiredParameterNode).map(&:name),
+              *params.keywords.reject(&:value).map(&:name),
+              *params.keywords.select(&:value).map(&:name)
+            ]
+
+            # TODO: When we get a ... parameter, we should be pushing * and &
+            # onto the local list. We don't do that yet, so we need to add them
+            # in here.
+            if params.keyword_rest.is_a?(ForwardingParameterNode)
+              sorted.push(:*, :&, :"...")
+            end
+
+            # Recurse down the parameter tree to find any destructured
+            # parameters and add them after the other parameters.
+            param_stack = params.requireds.concat(params.posts).grep(RequiredDestructuredParameterNode).reverse
+            while (param = param_stack.pop)
+              case param
+              when RequiredDestructuredParameterNode
+                param_stack.concat(param.parameters.reverse)
+              when RequiredParameterNode
+                sorted << param.name
+              when SplatNode
+                sorted << param.expression.name if param.expression
+              end
+            end
+
+            names = sorted.concat(names - sorted)
+          end
+
+          locals << names
+        when ClassNode, ModuleNode, ProgramNode, SingletonClassNode
+          locals << node.locals
+        when ForNode
+          locals << []
+        when PostExecutionNode
+          locals.push([], [])
+        when InterpolatedRegularExpressionNode
+          locals << [] if node.once?
+        end
+
+        stack.concat(node.compact_child_nodes)
+      end
+
+      locals
+    end
+
+    def self.newlines(source)
+      YARP.parse(source).source.offsets
+    end
+
+    def self.parse_serialize_file(filepath)
+      parse_serialize_file_metadata(filepath, [filepath.bytesize, filepath.b, 0].pack("LA*L"))
+    end
+  end
+end
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb
new file mode 100644
index 0000000000..b86e8518c6
--- /dev/null
+++ b/lib/prism/desugar_compiler.rb
@@ -0,0 +1,206 @@
+# frozen_string_literal: true
+
+module YARP
+  # DesugarCompiler is a compiler that desugars Ruby code into a more primitive
+  # form. This is useful for consumers that want to deal with fewer node types.
+  class DesugarCompiler < MutationCompiler
+    # @@foo &&= bar
+    #
+    # becomes
+    #
+    # @@foo && @@foo = bar
+    def visit_class_variable_and_write_node(node)
+      desugar_and_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name)
+    end
+
+    # @@foo ||= bar
+    #
+    # becomes
+    #
+    # defined?(@@foo) ? @@foo : @@foo = bar
+    def visit_class_variable_or_write_node(node)
+      desugar_or_write_defined_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name)
+    end
+
+    # @@foo += bar
+    #
+    # becomes
+    #
+    # @@foo = @@foo + bar
+    def visit_class_variable_operator_write_node(node)
+      desugar_operator_write_node(node, ClassVariableReadNode, ClassVariableWriteNode, node.name)
+    end
+
+    # Foo &&= bar
+    #
+    # becomes
+    #
+    # Foo && Foo = bar
+    def visit_constant_and_write_node(node)
+      desugar_and_write_node(node, ConstantReadNode, ConstantWriteNode, node.name)
+    end
+
+    # Foo ||= bar
+    #
+    # becomes
+    #
+    # defined?(Foo) ? Foo : Foo = bar
+    def visit_constant_or_write_node(node)
+      desugar_or_write_defined_node(node, ConstantReadNode, ConstantWriteNode, node.name)
+    end
+
+    # Foo += bar
+    #
+    # becomes
+    #
+    # Foo = Foo + bar
+    def visit_constant_operator_write_node(node)
+      desugar_operator_write_node(node, ConstantReadNode, ConstantWriteNode, node.name)
+    end
+
+    # $foo &&= bar
+    #
+    # becomes
+    #
+    # $foo && $foo = bar
+    def visit_global_variable_and_write_node(node)
+      desugar_and_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name)
+    end
+
+    # $foo ||= bar
+    #
+    # becomes
+    #
+    # defined?($foo) ? $foo : $foo = bar
+    def visit_global_variable_or_write_node(node)
+      desugar_or_write_defined_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name)
+    end
+
+    # $foo += bar
+    #
+    # becomes
+    #
+    # $foo = $foo + bar
+    def visit_global_variable_operator_write_node(node)
+      desugar_operator_write_node(node, GlobalVariableReadNode, GlobalVariableWriteNode, node.name)
+    end
+
+    # @foo &&= bar
+    #
+    # becomes
+    #
+    # @foo && @foo = bar
+    def visit_instance_variable_and_write_node(node)
+      desugar_and_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name)
+    end
+
+    # @foo ||= bar
+    #
+    # becomes
+    #
+    # @foo || @foo = bar
+    def visit_instance_variable_or_write_node(node)
+      desugar_or_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name)
+    end
+
+    # @foo += bar
+    #
+    # becomes
+    #
+    # @foo = @foo + bar
+    def visit_instance_variable_operator_write_node(node)
+      desugar_operator_write_node(node, InstanceVariableReadNode, InstanceVariableWriteNode, node.name)
+    end
+
+    # foo &&= bar
+    #
+    # becomes
+    #
+    # foo && foo = bar
+    def visit_local_variable_and_write_node(node)
+      desugar_and_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth)
+    end
+
+    # foo ||= bar
+    #
+    # becomes
+    #
+    # foo || foo = bar
+    def visit_local_variable_or_write_node(node)
+      desugar_or_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth)
+    end
+
+    # foo += bar
+    #
+    # becomes
+    #
+    # foo = foo + bar
+    def visit_local_variable_operator_write_node(node)
+      desugar_operator_write_node(node, LocalVariableReadNode, LocalVariableWriteNode, node.name, node.depth)
+    end
+
+    private
+
+    # Desugar `x &&= y` to `x && x = y`
+    def desugar_and_write_node(node, read_class, write_class, *arguments)
+      AndNode.new(
+        read_class.new(*arguments, node.name_loc),
+        write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location),
+        node.operator_loc,
+        node.location
+      )
+    end
+
+    # Desugar `x += y` to `x = x + y`
+    def desugar_operator_write_node(node, read_class, write_class, *arguments)
+      write_class.new(
+        *arguments,
+        node.name_loc,
+        CallNode.new(
+          read_class.new(*arguments, node.name_loc),
+          nil,
+          node.operator_loc.copy(length: node.operator_loc.length - 1),
+          nil,
+          ArgumentsNode.new([node.value], node.value.location),
+          nil,
+          nil,
+          0,
+          node.operator_loc.slice.chomp("="),
+          node.location
+        ),
+        node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
+        node.location
+      )
+    end
+
+    # Desugar `x ||= y` to `x || x = y`
+    def desugar_or_write_node(node, read_class, write_class, *arguments)
+      OrNode.new(
+        read_class.new(*arguments, node.name_loc),
+        write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location),
+        node.operator_loc,
+        node.location
+      )
+    end
+
+    # Desugar `x ||= y` to `defined?(x) ? x : x = y`
+    def desugar_or_write_defined_node(node, read_class, write_class, *arguments)
+      IfNode.new(
+        node.operator_loc,
+        DefinedNode.new(nil, read_class.new(*arguments, node.name_loc), nil, node.operator_loc, node.name_loc),
+        StatementsNode.new([read_class.new(*arguments, node.name_loc)], node.location),
+        ElseNode.new(
+          node.operator_loc,
+          StatementsNode.new(
+            [write_class.new(*arguments, node.name_loc, node.value, node.operator_loc, node.location)],
+            node.location
+          ),
+          node.operator_loc,
+          node.location
+        ),
+        node.operator_loc,
+        node.location
+      )
+    end
+  end
+end
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
new file mode 100644
index 0000000000..82643be808
--- /dev/null
+++ b/lib/prism/ffi.rb
@@ -0,0 +1,251 @@
+# frozen_string_literal: true
+
+# This file is responsible for mirroring the API provided by the C extension by
+# using FFI to call into the shared library.
+
+require "rbconfig"
+require "ffi"
+
+module YARP
+  BACKEND = :FFI
+
+  module LibRubyParser
+    extend FFI::Library
+
+    # Define the library that we will be pulling functions from. Note that this
+    # must align with the build shared library from make/rake.
+    ffi_lib File.expand_path("../../build/librubyparser.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
+
+    # Convert a native C type declaration into a symbol that FFI understands.
+    # For example:
+    #
+    #     const char * -> :pointer
+    #     bool         -> :bool
+    #     size_t       -> :size_t
+    #     void         -> :void
+    #
+    def self.resolve_type(type)
+      type = type.strip.delete_prefix("const ")
+      type.end_with?("*") ? :pointer : type.to_sym
+    end
+
+    # Read through the given header file and find the declaration of each of the
+    # given functions. For each one, define a function with the same name and
+    # signature as the C function.
+    def self.load_exported_functions_from(header, *functions)
+      File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
+        # We only want to attempt to load exported functions.
+        next unless line.start_with?("YP_EXPORTED_FUNCTION ")
+
+        # We only want to load the functions that we are interested in.
+        next unless functions.any? { |function| line.include?(function) }
+
+        # Parse the function declaration.
+        unless /^YP_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
+          raise "Could not parse #{line}"
+        end
+
+        # Delete the function from the list of functions we are looking for to
+        # mark it as having been found.
+        functions.delete(name)
+
+        # Split up the argument types into an array, ensure we handle the case
+        # where there are no arguments (by explicit void).
+        arg_types = arg_types.split(",").map(&:strip)
+        arg_types = [] if arg_types == %w[void]
+
+        # Resolve the type of the argument by dropping the name of the argument
+        # first if it is present.
+        arg_types.map! { |type| resolve_type(type.sub(/\w+$/, "")) }
+
+        # Attach the function using the FFI library.
+        attach_function name, arg_types, resolve_type(return_type)
+      end
+
+      # If we didn't find all of the functions, raise an error.
+      raise "Could not find functions #{functions.inspect}" unless functions.empty?
+    end
+
+    load_exported_functions_from(
+      "yarp.h",
+      "yp_version",
+      "yp_parse_serialize",
+      "yp_lex_serialize",
+      "yp_parse_lex_serialize"
+    )
+
+    load_exported_functions_from(
+      "yarp/util/yp_buffer.h",
+      "yp_buffer_sizeof",
+      "yp_buffer_init",
+      "yp_buffer_value",
+      "yp_buffer_length",
+      "yp_buffer_free"
+    )
+
+    load_exported_functions_from(
+      "yarp/util/yp_string.h",
+      "yp_string_mapped_init",
+      "yp_string_free",
+      "yp_string_source",
+      "yp_string_length",
+      "yp_string_sizeof"
+    )
+
+    # This object represents a yp_buffer_t. We only use it as an opaque pointer,
+    # so it doesn't need to know the fields of yp_buffer_t.
+    class YPBuffer
+      SIZEOF = LibRubyParser.yp_buffer_sizeof
+
+      attr_reader :pointer
+
+      def initialize(pointer)
+        @pointer = pointer
+      end
+
+      def value
+        LibRubyParser.yp_buffer_value(pointer)
+      end
+
+      def length
+        LibRubyParser.yp_buffer_length(pointer)
+      end
+
+      def read
+        value.read_string(length)
+      end
+
+      # Initialize a new buffer and yield it to the block. The buffer will be
+      # automatically freed when the block returns.
+      def self.with(&block)
+        pointer = FFI::MemoryPointer.new(SIZEOF)
+
+        begin
+          raise unless LibRubyParser.yp_buffer_init(pointer)
+          yield new(pointer)
+        ensure
+          LibRubyParser.yp_buffer_free(pointer)
+          pointer.free
+        end
+      end
+    end
+
+    # This object represents a yp_string_t. We only use it as an opaque pointer,
+    # so it doesn't have to be an FFI::Struct.
+    class YPString
+      SIZEOF = LibRubyParser.yp_string_sizeof
+
+      attr_reader :pointer
+
+      def initialize(pointer)
+        @pointer = pointer
+      end
+
+      def source
+        LibRubyParser.yp_string_source(pointer)
+      end
+
+      def length
+        LibRubyParser.yp_string_length(pointer)
+      end
+
+      def read
+        source.read_string(length)
+      end
+
+      # Yields a yp_string_t pointer to the given block.
+      def self.with(filepath, &block)
+        pointer = FFI::MemoryPointer.new(SIZEOF)
+
+        begin
+          raise unless LibRubyParser.yp_string_mapped_init(pointer, filepath)
+          yield new(pointer)
+        ensure
+          LibRubyParser.yp_string_free(pointer)
+          pointer.free
+        end
+      end
+    end
+
+    def self.dump_internal(source, source_size, filepath)
+      YPBuffer.with do |buffer|
+        metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
+        yp_parse_serialize(source, source_size, buffer.pointer, metadata)
+        buffer.read
+      end
+    end
+  end
+
+  # Mark the LibRubyParser module as private as it should only be called through
+  # the YARP module.
+  private_constant :LibRubyParser
+
+  # The version constant is set by reading the result of calling yp_version.
+  VERSION = LibRubyParser.yp_version.read_string
+
+  # Mirror the YARP.dump API by using the serialization API.
+  def self.dump(code, filepath = nil)
+    LibRubyParser.dump_internal(code, code.bytesize, filepath)
+  end
+
+  # Mirror the YARP.dump_file API by using the serialization API.
+  def self.dump_file(filepath)
+    LibRubyParser::YPString.with(filepath) do |string|
+      LibRubyParser.dump_internal(string.source, string.length, filepath)
+    end
+  end
+
+  # Mirror the YARP.lex API by using the serialization API.
+  def self.lex(code, filepath = nil)
+    LibRubyParser::YPBuffer.with do |buffer|
+      LibRubyParser.yp_lex_serialize(code, code.bytesize, filepath, buffer.pointer)
+      Serialize.load_tokens(Source.new(code), buffer.read)
+    end
+  end
+
+  # Mirror the YARP.lex_file API by using the serialization API.
+  def self.lex_file(filepath)
+    LibRubyParser::YPString.with(filepath) do |string|
+      lex(string.read, filepath)
+    end
+  end
+
+  # Mirror the YARP.parse API by using the serialization API.
+  def self.parse(code, filepath = nil)
+    YARP.load(code, dump(code, filepath))
+  end
+
+  # Mirror the YARP.parse_file API by using the serialization API. This uses
+  # native strings instead of Ruby strings because it allows us to use mmap when
+  # it is available.
+  def self.parse_file(filepath)
+    LibRubyParser::YPString.with(filepath) do |string|
+      parse(string.read, filepath)
+    end
+  end
+
+  # Mirror the YARP.parse_lex API by using the serialization API.
+  def self.parse_lex(code, filepath = nil)
+    LibRubyParser::YPBuffer.with do |buffer|
+      metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
+      LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
+
+      source = Source.new(code)
+      loader = Serialize::Loader.new(source, buffer.read)
+
+      tokens = loader.load_tokens
+      node, comments, errors, warnings = loader.load_nodes
+
+      tokens.each { |token,| token.value.force_encoding(loader.encoding) }
+
+      ParseResult.new([node, tokens], comments, errors, warnings, source)
+    end
+  end
+
+  # Mirror the YARP.parse_lex_file API by using the serialization API.
+  def self.parse_lex_file(filepath)
+    LibRubyParser::YPString.with(filepath) do |string|
+      parse_lex(string.read, filepath)
+    end
+  end
+end
diff --git a/lib/prism/language_server.rb b/lib/prism/language_server.rb
new file mode 100644
index 0000000000..5a10d484a1
--- /dev/null
+++ b/lib/prism/language_server.rb
@@ -0,0 +1,166 @@
+# frozen_string_literal: true
+
+require "cgi"
+require "json"
+require "uri"
+
+module YARP
+  # YARP additionally ships with a language server conforming to the
+  # language server protocol. It can be invoked by running the yarp-lsp
+  # bin script (bin/yarp-lsp)
+  class LanguageServer
+    GITHUB_TEMPLATE = <<~TEMPLATE
+    Reporting issue with error `%{error}`.
+
+    ## Expected behavior
+    <!-- TODO: Briefly explain what the expected behavior should be on this example. -->
+
+    ## Actual behavior
+    <!-- TODO: Describe here what actually happened. -->
+
+    ## Steps to reproduce the problem
+    <!-- TODO: Describe how we can reproduce the problem. -->
+
+    ## Additional information
+    <!-- TODO: Include any additional information, such as screenshots. -->
+
+    TEMPLATE
+
+    attr_reader :input, :output
+
+    def initialize(
+      input: $stdin,
+      output: $stdout
+    )
+      @input = input.binmode
+      @output = output.binmode
+    end
+
+    # rubocop:disable Layout/LineLength
+    def run
+      store =
+        Hash.new do |hash, uri|
+          filepath = CGI.unescape(URI.parse(uri).path)
+          File.exist?(filepath) ? (hash[uri] = File.read(filepath)) : nil
+        end
+
+      while (headers = input.gets("\r\n\r\n"))
+        source = input.read(headers[/Content-Length: (\d+)/i, 1].to_i)
+        request = JSON.parse(source, symbolize_names: true)
+
+        # stree-ignore
+        case request
+        in { method: "initialize", id: }
+          store.clear
+          write(id: id, result: { capabilities: capabilities })
+        in { method: "initialized" }
+          # ignored
+        in { method: "shutdown" } # tolerate missing ID to be a good citizen
+          store.clear
+          write(id: request[:id], result: {})
+        in { method: "exit"}
+          return
+        in { method: "textDocument/didChange", params: { textDocument: { uri: }, contentChanges: [{ text: }, *] } }
+          store[uri] = text
+        in { method: "textDocument/didOpen", params: { textDocument: { uri:, text: } } }
+          store[uri] = text
+        in { method: "textDocument/didClose", params: { textDocument: { uri: } } }
+          store.delete(uri)
+        in { method: "textDocument/diagnostic", id:, params: { textDocument: { uri: } } }
+          contents = store[uri]
+          write(id: id, result: contents ? diagnostics(contents) : nil)
+        in { method: "textDocument/codeAction", id:, params: { textDocument: { uri: }, context: { diagnostics: }}}
+          contents = store[uri]
+          write(id: id, result: contents ? code_actions(contents, diagnostics) : nil)
+        in { method: %r{\$/.+} }
+          # ignored
+        end
+      end
+    end
+    # rubocop:enable Layout/LineLength
+
+    private
+
+    def capabilities
+      {
+        codeActionProvider: {
+          codeActionKinds: [
+            'quickfix',
+          ],
+        },
+        diagnosticProvider: {
+          interFileDependencies: false,
+          workspaceDiagnostics: false,
+        },
+        textDocumentSync: {
+          change: 1,
+          openClose: true
+        },
+      }
+    end
+
+    def code_actions(source, diagnostics)
+      diagnostics.map do |diagnostic|
+        message = diagnostic[:message]
+        issue_content = URI.encode_www_form_component(GITHUB_TEMPLATE % {error: message})
+        issue_link = "https://github.com/ruby/yarp/issues/new?&labels=Bug&body=#{issue_content}"
+
+        {
+          title: "Report incorrect error: `#{diagnostic[:message]}`",
+          kind: "quickfix",
+          diagnostics: [diagnostic],
+          command: {
+            title: "Report incorrect error",
+            command: "vscode.open",
+            arguments: [issue_link]
+          }
+        }
+      end
+    end
+
+    def diagnostics(source)
+      offsets = Hash.new do |hash, key|
+        slice = source.byteslice(...key)
+        lineno = slice.count("\n")
+
+        char = slice.length
+        newline = source.rindex("\n", [char - 1, 0].max) || -1
+        hash[key] = { line: lineno, character: char - newline - 1 }
+      end
+
+      parse_output = YARP.parse(source)
+
+      {
+        kind: "full",
+        items: [
+          *parse_output.errors.map do |error|
+            {
+              range: {
+                start: offsets[error.location.start_offset],
+                end: offsets[error.location.end_offset],
+              },
+              message: error.message,
+              severity: 1,
+            }
+          end,
+          *parse_output.warnings.map do |warning|
+            {
+              range: {
+                start: offsets[warning.location.start_offset],
+                end: offsets[warning.location.end_offset],
+              },
+              message: warning.message,
+              severity: 2,
+            }
+          end,
+        ]
+      }
+    end
+
+    def write(value)
+      response = value.merge(jsonrpc: "2.0").to_json
+      output.print("Content-Length: #{response.bytesize}\r\n\r\n#{response}")
+      output.flush
+    end
+  end
+end
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
new file mode 100644
index 0000000000..720ac2b59b
--- /dev/null
+++ b/lib/prism/lex_compat.rb
@@ -0,0 +1,838 @@
+# frozen_string_literal: true
+
+require "delegate"
+
+module YARP
+  # This class is responsible for lexing the source using YARP and then
+  # converting those tokens to be compatible with Ripper. In the vast majority
+  # of cases, this is a one-to-one mapping of the token type. Everything else
+  # generally lines up. However, there are a few cases that require special
+  # handling.
+  class LexCompat
+    # This is a mapping of YARP token types to Ripper token types. This is a
+    # many-to-one mapping because we split up our token types, whereas Ripper
+    # tends to group them.
+    RIPPER = {
+      AMPERSAND: :on_op,
+      AMPERSAND_AMPERSAND: :on_op,
+      AMPERSAND_AMPERSAND_EQUAL: :on_op,
+      AMPERSAND_DOT: :on_op,
+      AMPERSAND_EQUAL: :on_op,
+      BACK_REFERENCE: :on_backref,
+      BACKTICK: :on_backtick,
+      BANG: :on_op,
+      BANG_EQUAL: :on_op,
+      BANG_TILDE: :on_op,
+      BRACE_LEFT: :on_lbrace,
+      BRACE_RIGHT: :on_rbrace,
+      BRACKET_LEFT: :on_lbracket,
+      BRACKET_LEFT_ARRAY: :on_lbracket,
+      BRACKET_LEFT_RIGHT: :on_op,
+      BRACKET_LEFT_RIGHT_EQUAL: :on_op,
+      BRACKET_RIGHT: :on_rbracket,
+      CARET: :on_op,
+      CARET_EQUAL: :on_op,
+      CHARACTER_LITERAL: :on_CHAR,
+      CLASS_VARIABLE: :on_cvar,
+      COLON: :on_op,
+      COLON_COLON: :on_op,
+      COMMA: :on_comma,
+      COMMENT: :on_comment,
+      CONSTANT: :on_const,
+      DOT: :on_period,
+      DOT_DOT: :on_op,
+      DOT_DOT_DOT: :on_op,
+      EMBDOC_BEGIN: :on_embdoc_beg,
+      EMBDOC_END: :on_embdoc_end,
+      EMBDOC_LINE: :on_embdoc,
+      EMBEXPR_BEGIN: :on_embexpr_beg,
+      EMBEXPR_END: :on_embexpr_end,
+      EMBVAR: :on_embvar,
+      EOF: :on_eof,
+      EQUAL: :on_op,
+      EQUAL_EQUAL: :on_op,
+      EQUAL_EQUAL_EQUAL: :on_op,
+      EQUAL_GREATER: :on_op,
+      EQUAL_TILDE: :on_op,
+      FLOAT: :on_float,
+      FLOAT_IMAGINARY: :on_imaginary,
+      FLOAT_RATIONAL: :on_rational,
+      FLOAT_RATIONAL_IMAGINARY: :on_imaginary,
+      GREATER: :on_op,
+      GREATER_EQUAL: :on_op,
+      GREATER_GREATER: :on_op,
+      GREATER_GREATER_EQUAL: :on_op,
+      GLOBAL_VARIABLE: :on_gvar,
+      HEREDOC_END: :on_heredoc_end,
+      HEREDOC_START: :on_heredoc_beg,
+      IDENTIFIER: :on_ident,
+      IGNORED_NEWLINE: :on_ignored_nl,
+      INTEGER: :on_int,
+      INTEGER_IMAGINARY: :on_imaginary,
+      INTEGER_RATIONAL: :on_rational,
+      INTEGER_RATIONAL_IMAGINARY: :on_imaginary,
+      INSTANCE_VARIABLE: :on_ivar,
+      INVALID: :INVALID,
+      KEYWORD___ENCODING__: :on_kw,
+      KEYWORD___LINE__: :on_kw,
+      KEYWORD___FILE__: :on_kw,
+      KEYWORD_ALIAS: :on_kw,
+      KEYWORD_AND: :on_kw,
+      KEYWORD_BEGIN: :on_kw,
+      KEYWORD_BEGIN_UPCASE: :on_kw,
+      KEYWORD_BREAK: :on_kw,
+      KEYWORD_CASE: :on_kw,
+      KEYWORD_CLASS: :on_kw,
+      KEYWORD_DEF: :on_kw,
+      KEYWORD_DEFINED: :on_kw,
+      KEYWORD_DO: :on_kw,
+      KEYWORD_DO_LOOP: :on_kw,
+      KEYWORD_ELSE: :on_kw,
+      KEYWORD_ELSIF: :on_kw,
+      KEYWORD_END: :on_kw,
+      KEYWORD_END_UPCASE: :on_kw,
+      KEYWORD_ENSURE: :on_kw,
+      KEYWORD_FALSE: :on_kw,
+      KEYWORD_FOR: :on_kw,
+      KEYWORD_IF: :on_kw,
+      KEYWORD_IF_MODIFIER: :on_kw,
+      KEYWORD_IN: :on_kw,
+      KEYWORD_MODULE: :on_kw,
+      KEYWORD_NEXT: :on_kw,
+      KEYWORD_NIL: :on_kw,
+      KEYWORD_NOT: :on_kw,
+      KEYWORD_OR: :on_kw,
+      KEYWORD_REDO: :on_kw,
+      KEYWORD_RESCUE: :on_kw,
+      KEYWORD_RESCUE_MODIFIER: :on_kw,
+      KEYWORD_RETRY: :on_kw,
+      KEYWORD_RETURN: :on_kw,
+      KEYWORD_SELF: :on_kw,
+      KEYWORD_SUPER: :on_kw,
+      KEYWORD_THEN: :on_kw,
+      KEYWORD_TRUE: :on_kw,
+      KEYWORD_UNDEF: :on_kw,
+      KEYWORD_UNLESS: :on_kw,
+      KEYWORD_UNLESS_MODIFIER: :on_kw,
+      KEYWORD_UNTIL: :on_kw,
+      KEYWORD_UNTIL_MODIFIER: :on_kw,
+      KEYWORD_WHEN: :on_kw,
+      KEYWORD_WHILE: :on_kw,
+      KEYWORD_WHILE_MODIFIER: :on_kw,
+      KEYWORD_YIELD: :on_kw,
+      LABEL: :on_label,
+      LABEL_END: :on_label_end,
+      LAMBDA_BEGIN: :on_tlambeg,
+      LESS: :on_op,
+      LESS_EQUAL: :on_op,
+      LESS_EQUAL_GREATER: :on_op,
+      LESS_LESS: :on_op,
+      LESS_LESS_EQUAL: :on_op,
+      METHOD_NAME: :on_ident,
+      MINUS: :on_op,
+      MINUS_EQUAL: :on_op,
+      MINUS_GREATER: :on_tlambda,
+      NEWLINE: :on_nl,
+      NUMBERED_REFERENCE: :on_backref,
+      PARENTHESIS_LEFT: :on_lparen,
+      PARENTHESIS_LEFT_PARENTHESES: :on_lparen,
+      PARENTHESIS_RIGHT: :on_rparen,
+      PERCENT: :on_op,
+      PERCENT_EQUAL: :on_op,
+      PERCENT_LOWER_I: :on_qsymbols_beg,
+      PERCENT_LOWER_W: :on_qwords_beg,
+      PERCENT_LOWER_X: :on_backtick,
+      PERCENT_UPPER_I: :on_symbols_beg,
+      PERCENT_UPPER_W: :on_words_beg,
+      PIPE: :on_op,
+      PIPE_EQUAL: :on_op,
+      PIPE_PIPE: :on_op,
+      PIPE_PIPE_EQUAL: :on_op,
+      PLUS: :on_op,
+      PLUS_EQUAL: :on_op,
+      QUESTION_MARK: :on_op,
+      RATIONAL_FLOAT: :on_rational,
+      RATIONAL_INTEGER: :on_rational,
+      REGEXP_BEGIN: :on_regexp_beg,
+      REGEXP_END: :on_regexp_end,
+      SEMICOLON: :on_semicolon,
+      SLASH: :on_op,
+      SLASH_EQUAL: :on_op,
+      STAR: :on_op,
+      STAR_EQUAL: :on_op,
+      STAR_STAR: :on_op,
+      STAR_STAR_EQUAL: :on_op,
+      STRING_BEGIN: :on_tstring_beg,
+      STRING_CONTENT: :on_tstring_content,
+      STRING_END: :on_tstring_end,
+      SYMBOL_BEGIN: :on_symbeg,
+      TILDE: :on_op,
+      UAMPERSAND: :on_op,
+      UCOLON_COLON: :on_op,
+      UDOT_DOT: :on_op,
+      UDOT_DOT_DOT: :on_op,
+      UMINUS: :on_op,
+      UMINUS_NUM: :on_op,
+      UPLUS: :on_op,
+      USTAR: :on_op,
+      USTAR_STAR: :on_op,
+      WORDS_SEP: :on_words_sep,
+      "__END__": :on___end__
+    }.freeze
+
+    # When we produce tokens, we produce the same arrays that Ripper does.
+    # However, we add a couple of convenience methods onto them to make them a
+    # little easier to work with. We delegate all other methods to the array.
+    class Token < SimpleDelegator
+      def location
+        self[0]
+      end
+
+      def event
+        self[1]
+      end
+
+      def value
+        self[2]
+      end
+
+      def state
+        self[3]
+      end
+    end
+
+    # Ripper doesn't include the rest of the token in the event, so we need to
+    # trim it down to just the content on the first line when comparing.
+    class EndContentToken < Token
+      def ==(other)
+        [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
+      end
+    end
+
+    # Tokens where state should be ignored
+    # used for :on_comment, :on_heredoc_end, :on_embexpr_end
+    class IgnoreStateToken < Token
+      def ==(other)
+        self[0...-1] == other[0...-1]
+      end
+    end
+
+    # Ident tokens for the most part are exactly the same, except sometimes we
+    # know an ident is a local when ripper doesn't (when they are introduced
+    # through named captures in regular expressions). In that case we don't
+    # compare the state.
+    class IdentToken < Token
+      def ==(other)
+        (self[0...-1] == other[0...-1]) && (
+          (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
+          (other[3] & Ripper::EXPR_ARG_ANY != 0)
+        )
+      end
+    end
+
+    # Ignored newlines can occasionally have a LABEL state attached to them, so
+    # we compare the state differently here.
+    class IgnoredNewlineToken < Token
+      def ==(other)
+        return false unless self[0...-1] == other[0...-1]
+
+        if self[4] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
+          other[4] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED > 0
+        else
+          self[4] == other[4]
+        end
+      end
+    end
+
+    # If we have an identifier that follows a method name like:
+    #
+    #     def foo bar
+    #
+    # then Ripper will mark bar as END|LABEL if there is a local in a parent
+    # scope named bar because it hasn't pushed the local table yet. We do this
+    # more accurately, so we need to allow comparing against both END and
+    # END|LABEL.
+    class ParamToken < Token
+      def ==(other)
+        (self[0...-1] == other[0...-1]) && (
+          (other[3] == Ripper::EXPR_END) ||
+          (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
+        )
+      end
+    end
+
+    # A heredoc in this case is a list of tokens that belong to the body of the
+    # heredoc that should be appended onto the list of tokens when the heredoc
+    # closes.
+    module Heredoc
+      # Heredocs that are no dash or tilde heredocs are just a list of tokens.
+      # We need to keep them around so that we can insert them in the correct
+      # order back into the token stream and set the state of the last token to
+      # the state that the heredoc was opened in.
+      class PlainHeredoc
+        attr_reader :tokens
+
+        def initialize
+          @tokens = []
+        end
+
+        def <<(token)
+          tokens << token
+        end
+
+        def to_a
+          tokens
+        end
+      end
+
+      # Dash heredocs are a little more complicated. They are a list of tokens
+      # that need to be split on "\\\n" to mimic Ripper's behavior. We also need
+      # to keep track of the state that the heredoc was opened in.
+      class DashHeredoc
+        attr_reader :split, :tokens
+
+        def initialize(split)
+          @split = split
+          @tokens = []
+        end
+
+        def <<(token)
+          tokens << token
+        end
+
+        def to_a
+          embexpr_balance = 0
+
+          tokens.each_with_object([]) do |token, results|
+            case token.event
+            when :on_embexpr_beg
+              embexpr_balance += 1
+              results << token
+            when :on_embexpr_end
+              embexpr_balance -= 1
+              results << token
+            when :on_tstring_content
+              if embexpr_balance == 0
+                lineno = token[0][0]
+                column = token[0][1]
+
+                if split
+                  # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
+                  # to keep the delimiter in the result.
+                  token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+                    column = 0 if index > 0
+                    results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+                    lineno += value.count("\n")
+                  end
+                else
+                  results << token
+                end
+              else
+                results << token
+              end
+            else
+              results << token
+            end
+          end
+        end
+      end
+
+      # Heredocs that are dedenting heredocs are a little more complicated.
+      # Ripper outputs on_ignored_sp tokens for the whitespace that is being
+      # removed from the output. YARP only modifies the node itself and keeps
+      # the token the same. This simplifies YARP, but makes comparing against
+      # Ripper much harder because there is a length mismatch.
+      #
+      # Fortunately, we already have to pull out the heredoc tokens in order to
+      # insert them into the stream in the correct order. As such, we can do
+      # some extra manipulation on the tokens to make them match Ripper's
+      # output by mirroring the dedent logic that Ripper uses.
+      class DedentingHeredoc
+        TAB_WIDTH = 8
+
+        attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
+
+        def initialize
+          @tokens = []
+          @dedent_next = true
+          @dedent = nil
+          @embexpr_balance = 0
+        end
+
+        # As tokens are coming in, we track the minimum amount of common leading
+        # whitespace on plain string content tokens. This allows us to later
+        # remove that amount of whitespace from the beginning of each line.
+        def <<(token)
+          case token.event
+          when :on_embexpr_beg, :on_heredoc_beg
+            @embexpr_balance += 1
+          when :on_embexpr_end, :on_heredoc_end
+            @embexpr_balance -= 1
+          when :on_tstring_content
+            if embexpr_balance == 0
+              token.value.split(/(?<=\n)/).each_with_index do |line, index|
+                next if line.strip.empty? && line.end_with?("\n")
+                next if !(dedent_next || index > 0)
+
+                leading = line[/\A(\s*)\n?/, 1]
+                next_dedent = 0
+
+                leading.each_char do |char|
+                  if char == "\t"
+                    next_dedent = next_dedent - (next_dedent % TAB_WIDTH) + TAB_WIDTH
+                  else
+                    next_dedent += 1
+                  end
+                end
+
+                @dedent = [dedent, next_dedent].compact.min
+              end
+            end
+          end
+
+          @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+          tokens << token
+        end
+
+        def to_a
+          # If every line in the heredoc is blank, we still need to split up the
+          # string content token into multiple tokens.
+          if dedent.nil?
+            results = []
+            embexpr_balance = 0
+
+            tokens.each do |token|
+              case token.event
+              when :on_embexpr_beg, :on_heredoc_beg
+                embexpr_balance += 1
+                results << token
+              when :on_embexpr_end, :on_heredoc_end
+                embexpr_balance -= 1
+                results << token
+              when :on_tstring_content
+                if embexpr_balance == 0
+                  lineno = token[0][0]
+                  column = token[0][1]
+
+                  token.value.split(/(?<=\n)/).each_with_index do |value, index|
+                    column = 0 if index > 0
+                    results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+                    lineno += 1
+                  end
+                else
+                  results << token
+                end
+              else
+                results << token
+              end
+            end
+
+            return results
+          end
+
+          # Otherwise, we're going to run through each token in the list and
+          # insert on_ignored_sp tokens for the amount of dedent that we need to
+          # perform. We also need to remove the dedent from the beginning of
+          # each line of plain string content tokens.
+          results = []
+          dedent_next = true
+          embexpr_balance = 0
+
+          tokens.each do |token|
+            # Notice that the structure of this conditional largely matches the
+            # whitespace calculation we performed above. This is because
+            # checking if the subsequent token needs to be dedented is common to
+            # both the dedent calculation and the ignored_sp insertion.
+            case token.event
+            when :on_embexpr_beg
+              embexpr_balance += 1
+              results << token
+            when :on_embexpr_end
+              embexpr_balance -= 1
+              results << token
+            when :on_tstring_content
+              if embexpr_balance == 0
+                # Here we're going to split the string on newlines, but maintain
+                # the newlines in the resulting array. We'll do that with a look
+                # behind assertion.
+                splits = token.value.split(/(?<=\n)/)
+                index = 0
+
+                while index < splits.length
+                  line = splits[index]
+                  lineno = token[0][0] + index
+                  column = token[0][1]
+
+                  # Blank lines do not count toward common leading whitespace
+                  # calculation and do not need to be dedented.
+                  if dedent_next || index > 0
+                    column = 0
+                  end
+
+                  # If the dedent is 0 and we're not supposed to dedent the next
+                  # line or this line doesn't start with whitespace, then we
+                  # should concatenate the rest of the string to match ripper.
+                  if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
+                    line = splits[index..].join
+                    index = splits.length
+                  end
+
+                  # If we are supposed to dedent this line or if this is not the
+                  # first line of the string and this line isn't entirely blank,
+                  # then we need to insert an on_ignored_sp token and remove the
+                  # dedent from the beginning of the line.
+                  if (dedent > 0) && (dedent_next || index > 0)
+                    deleting = 0
+                    deleted_chars = []
+
+                    # Gather up all of the characters that we're going to
+                    # delete, stopping when you hit a character that would put
+                    # you over the dedent amount.
+                    line.each_char.with_index do |char, i|
+                      case char
+                      when "\r"
+                        if line.chars[i + 1] == "\n"
+                          break
+                        end
+                      when "\n"
+                        break
+                      when "\t"
+                        deleting = deleting - (deleting % TAB_WIDTH) + TAB_WIDTH
+                      else
+                        deleting += 1
+                      end
+
+                      break if deleting > dedent
+                      deleted_chars << char
+                    end
+
+                    # If we have something to delete, then delete it from the
+                    # string and insert an on_ignored_sp token.
+                    if deleted_chars.any?
+                      ignored = deleted_chars.join
+                      line.delete_prefix!(ignored)
+
+                      results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+                      column = ignored.length
+                    end
+                  end
+
+                  results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+                  index += 1
+                end
+              else
+                results << token
+              end
+            else
+              results << token
+            end
+
+            dedent_next =
+              ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+              embexpr_balance == 0
+          end
+
+          results
+        end
+      end
+
+      # Here we will split between the two types of heredocs and return the
+      # object that will store their tokens.
+      def self.build(opening)
+        case opening.value[2]
+        when "~"
+          DedentingHeredoc.new
+        when "-"
+          DashHeredoc.new(opening.value[3] != "'")
+        else
+          PlainHeredoc.new
+        end
+      end
+    end
+
+    attr_reader :source, :filepath
+
+    def initialize(source, filepath = "")
+      @source = source
+      @filepath = filepath || ""
+    end
+
+    def result
+      tokens = []
+
+      state = :default
+      heredoc_stack = [[]]
+
+      result = YARP.lex(source, @filepath)
+      result_value = result.value
+      previous_state = nil
+
+      # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+      # first token, so we had to have a hack in place to account for that. This
+      # checks for that behavior.
+      bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
+      bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
+
+      result_value.each_with_index do |(token, lex_state), index|
+        lineno = token.location.start_line
+        column = token.location.start_column
+
+        # If there's a UTF-8 byte-order mark as the start of the file, then for
+        # certain tokens ripper sets the first token back by 3 bytes. It also
+        # keeps the byte order mark in the first token's value. This is weird,
+        # and I don't want to mirror that in our parser. So instead, we'll match
+        # up the columns and values here.
+        if bom && lineno == 1
+          column -= 3
+
+          if index == 0 && column == 0 && !bom_flushed
+            flushed =
+              case token.type
+              when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
+                  :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
+                  :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
+                  :PERCENT_UPPER_W, :STRING_BEGIN
+                true
+              when :REGEXP_BEGIN, :SYMBOL_BEGIN
+                token.value.start_with?("%")
+              else
+                false
+              end
+
+            unless flushed
+              column -= 3
+              value = token.value
+              value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
+            end
+          end
+        end
+
+        event = RIPPER.fetch(token.type)
+        value = token.value
+        lex_state = Ripper::Lexer::State.new(lex_state)
+
+        token =
+          case event
+          when :on___end__
+            EndContentToken.new([[lineno, column], event, value, lex_state])
+          when :on_comment
+            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+          when :on_heredoc_end
+            # Heredoc end tokens can be emitted in an odd order, so we don't
+            # want to bother comparing the state on them.
+            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+          when :on_ident
+            if lex_state == Ripper::EXPR_END
+              # If we have an identifier that follows a method name like:
+              #
+              #     def foo bar
+              #
+              # then Ripper will mark bar as END|LABEL if there is a local in a
+              # parent scope named bar because it hasn't pushed the local table
+              # yet. We do this more accurately, so we need to allow comparing
+              # against both END and END|LABEL.
+              ParamToken.new([[lineno, column], event, value, lex_state])
+            elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
+              # In the event that we're comparing identifiers, we're going to
+              # allow a little divergence. Ripper doesn't account for local
+              # variables introduced through named captures in regexes, and we
+              # do, which accounts for this difference.
+              IdentToken.new([[lineno, column], event, value, lex_state])
+            else
+              Token.new([[lineno, column], event, value, lex_state])
+            end
+          when :on_embexpr_end
+            IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+          when :on_ignored_nl
+            # Ignored newlines can occasionally have a LABEL state attached to
+            # them which doesn't actually impact anything. We don't mirror that
+            # state so we ignored it.
+            IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
+          when :on_regexp_end
+            # On regex end, Ripper scans and then sets end state, so the ripper
+            # lexed output is begin, when it should be end. YARP sets lex state
+            # correctly to end state, but we want to be able to compare against
+            # Ripper's lexed state. So here, if it's a regexp end token, we
+            # output the state as the previous state, solely for the sake of
+            # comparison.
+            previous_token = result_value[index - 1][0]
+            lex_state =
+              if RIPPER.fetch(previous_token.type) == :on_embexpr_end
+                # If the previous token is embexpr_end, then we have to do even
+                # more processing. The end of an embedded expression sets the
+                # state to the state that it had at the beginning of the
+                # embedded expression. So we have to go and find that state and
+                # set it here.
+                counter = 1
+                current_index = index - 1
+
+                until counter == 0
+                  current_index -= 1
+                  current_event = RIPPER.fetch(result_value[current_index][0].type)
+                  counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
+                end
+
+                Ripper::Lexer::State.new(result_value[current_index][1])
+              else
+                previous_state
+              end
+
+            Token.new([[lineno, column], event, value, lex_state])
+          when :on_eof
+            previous_token = result_value[index - 1][0]
+
+            # If we're at the end of the file and the previous token was a
+            # comment and there is still whitespace after the comment, then
+            # Ripper will append a on_nl token (even though there isn't
+            # necessarily a newline). We mirror that here.
+            start_offset = previous_token.location.end_offset
+            end_offset = token.location.start_offset
+
+            if previous_token.type == :COMMENT && start_offset < end_offset
+              if bom
+                start_offset += 3
+                end_offset += 3
+              end
+
+              tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+            end
+
+            Token.new([[lineno, column], event, value, lex_state])
+          else
+            Token.new([[lineno, column], event, value, lex_state])
+          end
+
+        previous_state = lex_state
+
+        # The order in which tokens appear in our lexer is different from the
+        # order that they appear in Ripper. When we hit the declaration of a
+        # heredoc in YARP, we skip forward and lex the rest of the content of
+        # the heredoc before going back and lexing at the end of the heredoc
+        # identifier.
+        #
+        # To match up to ripper, we keep a small state variable around here to
+        # track whether we're in the middle of a heredoc or not. In this way we
+        # can shuffle around the token to match Ripper's output.
+        case state
+        when :default
+          # The default state is when there are no heredocs at all. In this
+          # state we can append the token to the list of tokens and move on.
+          tokens << token
+
+          # If we get the declaration of a heredoc, then we open a new heredoc
+          # and move into the heredoc_opened state.
+          if event == :on_heredoc_beg
+            state = :heredoc_opened
+            heredoc_stack.last << Heredoc.build(token)
+          end
+        when :heredoc_opened
+          # The heredoc_opened state is when we've seen the declaration of a
+          # heredoc and are now lexing the body of the heredoc. In this state we
+          # push tokens onto the most recently created heredoc.
+          heredoc_stack.last.last << token
+
+          case event
+          when :on_heredoc_beg
+            # If we receive a heredoc declaration while lexing the body of a
+            # heredoc, this means we have nested heredocs. In this case we'll
+            # push a new heredoc onto the stack and stay in the heredoc_opened
+            # state since we're now lexing the body of the new heredoc.
+            heredoc_stack << [Heredoc.build(token)]
+          when :on_heredoc_end
+            # If we receive the end of a heredoc, then we're done lexing the
+            # body of the heredoc. In this case we now have a completed heredoc
+            # but need to wait for the next newline to push it into the token
+            # stream.
+            state = :heredoc_closed
+          end
+        when :heredoc_closed
+          if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
+            if heredoc_stack.size > 1
+              flushing = heredoc_stack.pop
+              heredoc_stack.last.last << token
+
+              flushing.each do |heredoc|
+                heredoc.to_a.each do |flushed_token|
+                  heredoc_stack.last.last << flushed_token
+                end
+              end
+
+              state = :heredoc_opened
+              next
+            end
+          elsif event == :on_heredoc_beg
+            tokens << token
+            state = :heredoc_opened
+            heredoc_stack.last << Heredoc.build(token)
+            next
+          elsif heredoc_stack.size > 1
+            heredoc_stack[-2].last << token
+            next
+          end
+
+          heredoc_stack.last.each do |heredoc|
+            tokens.concat(heredoc.to_a)
+          end
+
+          heredoc_stack.last.clear
+          state = :default
+
+          tokens << token
+        end
+      end
+
+      # Drop the EOF token from the list
+      tokens = tokens[0...-1]
+
+      # We sort by location to compare against Ripper's output
+      tokens.sort_by!(&:location)
+
+      if result_value.size - 1 > tokens.size
+        raise StandardError, "Lost tokens when performing lex_compat"
+      end
+
+      ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
+    end
+  end
+
+  # This is a class that wraps the Ripper lexer to produce almost exactly the
+  # same tokens.
+  class LexRipper
+    attr_reader :source
+
+    def initialize(source)
+      @source = source
+    end
+
+    def result
+      previous = []
+      results = []
+
+      Ripper.lex(source, raise_errors: true).each do |token|
+        case token[1]
+        when :on_sp
+          # skip
+        when :on_tstring_content
+          if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
+            previous[2] << token[2]
+          else
+            results << token
+            previous = token
+          end
+        when :on_words_sep
+          if previous[1] == :on_words_sep
+            previous[2] << token[2]
+          else
+            results << token
+            previous = token
+          end
+        else
+          results << token
+          previous = token
+        end
+      end
+
+      results
+    end
+  end
+end
diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb
new file mode 100644
index 0000000000..760b3d75df
--- /dev/null
+++ b/lib/prism/node_ext.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+# Here we are reopening the YARP module to provide methods on nodes that aren't
+# templated and are meant as convenience methods.
+module YARP
+  class FloatNode < Node
+    # Returns the value of the node as a Ruby Float.
+    def value
+      Float(slice)
+    end
+  end
+
+  class ImaginaryNode < Node
+    # Returns the value of the node as a Ruby Complex.
+    def value
+      Complex(0, numeric.value)
+    end
+  end
+
+  class IntegerNode < Node
+    # Returns the value of the node as a Ruby Integer.
+    def value
+      Integer(slice)
+    end
+  end
+
+  class InterpolatedRegularExpressionNode < Node
+    # Returns a numeric value that represents the flags that were used to create
+    # the regular expression.
+    def options
+      o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
+      o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
+      o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
+      o
+    end
+  end
+
+  class RationalNode < Node
+    # Returns the value of the node as a Ruby Rational.
+    def value
+      Rational(slice.chomp("r"))
+    end
+  end
+
+  class RegularExpressionNode < Node
+    # Returns a numeric value that represents the flags that were used to create
+    # the regular expression.
+    def options
+      o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
+      o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
+      o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
+      o
+    end
+  end
+end
diff --git a/lib/prism/node_inspector.rb b/lib/prism/node_inspector.rb
new file mode 100644
index 0000000000..c09840a471
--- /dev/null
+++ b/lib/prism/node_inspector.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+module YARP
+  # This object is responsible for generating the output for the inspect method
+  # implementations of child nodes.
+  class NodeInspector
+    attr_reader :prefix, :output
+
+    def initialize(prefix = "")
+      @prefix = prefix
+      @output = +""
+    end
+
+    # Appends a line to the output with the current prefix.
+    def <<(line)
+      output << "#{prefix}#{line}"
+    end
+
+    # This generates a string that is used as the header of the inspect output
+    # for any given node.
+    def header(node)
+      output = +"@ #{node.class.name.split("::").last} ("
+      output << "location: (#{node.location.start_line},#{node.location.start_column})-(#{node.location.end_line},#{node.location.end_column})"
+      output << ", newline: true" if node.newline?
+      output << ")\n"
+      output
+    end
+
+    # Generates a string that represents a list of nodes. It handles properly
+    # using the box drawing characters to make the output look nice.
+    def list(prefix, nodes)
+      output = +"(length: #{nodes.length})\n"
+      last_index = nodes.length - 1
+
+      nodes.each_with_index do |node, index|
+        pointer, preadd = (index == last_index) ? ["└── ", "    "] : ["├── ", "│   "]
+        node_prefix = "#{prefix}#{preadd}"
+        output << node.inspect(NodeInspector.new(node_prefix)).sub(node_prefix, "#{prefix}#{pointer}")
+      end
+
+      output
+    end
+
+    # Generates a string that represents a location field on a node.
+    def location(value)
+      if value
+        "(#{value.start_line},#{value.start_column})-(#{value.end_line},#{value.end_column}) = #{value.slice.inspect}"
+      else
+        "∅"
+      end
+    end
+
+    # Generates a string that represents a child node.
+    def child_node(node, append)
+      node.inspect(child_inspector(append)).delete_prefix(prefix)
+    end
+
+    # Returns a new inspector that can be used to inspect a child node.
+    def child_inspector(append)
+      NodeInspector.new("#{prefix}#{append}")
+    end
+
+    # Returns the output as a string.
+    def to_str
+      output
+    end
+  end
+end
diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb
new file mode 100644
index 0000000000..83f5569923
--- /dev/null
+++ b/lib/prism/pack.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+
+module YARP
+  module Pack
+    %i[
+      SPACE
+      COMMENT
+      INTEGER
+      UTF8
+      BER
+      FLOAT
+      STRING_SPACE_PADDED
+      STRING_NULL_PADDED
+      STRING_NULL_TERMINATED
+      STRING_MSB
+      STRING_LSB
+      STRING_HEX_HIGH
+      STRING_HEX_LOW
+      STRING_UU
+      STRING_MIME
+      STRING_BASE64
+      STRING_FIXED
+      STRING_POINTER
+      MOVE
+      BACK
+      NULL
+
+      UNSIGNED
+      SIGNED
+      SIGNED_NA
+
+      AGNOSTIC_ENDIAN
+      LITTLE_ENDIAN
+      BIG_ENDIAN
+      NATIVE_ENDIAN
+      ENDIAN_NA
+
+      SIZE_SHORT
+      SIZE_INT
+      SIZE_LONG
+      SIZE_LONG_LONG
+      SIZE_8
+      SIZE_16
+      SIZE_32
+      SIZE_64
+      SIZE_P
+      SIZE_NA
+
+      LENGTH_FIXED
+      LENGTH_MAX
+      LENGTH_RELATIVE
+      LENGTH_NA
+    ].each do |const|
+      const_set(const, const)
+    end
+
+    class Directive
+      attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
+
+      def initialize(version, variant, source, type, signed, endian, size, length_type, length)
+        @version = version
+        @variant = variant
+        @source = source
+        @type = type
+        @signed = signed
+        @endian = endian
+        @size = size
+        @length_type = length_type
+        @length = length
+      end
+
+      ENDIAN_DESCRIPTIONS = {
+        AGNOSTIC_ENDIAN: 'agnostic',
+        LITTLE_ENDIAN: 'little-endian (VAX)',
+        BIG_ENDIAN: 'big-endian (network)',
+        NATIVE_ENDIAN: 'native-endian',
+        ENDIAN_NA: 'n/a'
+      }
+
+      SIGNED_DESCRIPTIONS = {
+        UNSIGNED: 'unsigned',
+        SIGNED: 'signed',
+        SIGNED_NA: 'n/a'
+      }
+
+      SIZE_DESCRIPTIONS = {
+        SIZE_SHORT: 'short',
+        SIZE_INT: 'int-width',
+        SIZE_LONG: 'long',
+        SIZE_LONG_LONG: 'long long',
+        SIZE_8: '8-bit',
+        SIZE_16: '16-bit',
+        SIZE_32: '32-bit',
+        SIZE_64: '64-bit',
+        SIZE_P: 'pointer-width'
+      }
+
+      def describe
+        case type
+        when SPACE
+          'whitespace'
+        when COMMENT
+          'comment'
+        when INTEGER
+          if size == SIZE_8
+            base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
+          else
+            base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
+          end
+          case length_type
+          when LENGTH_FIXED
+            if length > 1
+              base + ", x#{length}"
+            else
+              base
+            end
+          when LENGTH_MAX
+            base + ', as many as possible'
+          end
+        when UTF8
+          'UTF-8 character'
+        when BER
+          'BER-compressed integer'
+        when FLOAT
+          "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
+        when STRING_SPACE_PADDED
+          'arbitrary binary string (space padded)'
+        when STRING_NULL_PADDED
+          'arbitrary binary string (null padded, count is width)'
+        when STRING_NULL_TERMINATED
+          'arbitrary binary string (null padded, count is width), except that null is added with *'
+        when STRING_MSB
+          'bit string (MSB first)'
+        when STRING_LSB
+          'bit string (LSB first)'
+        when STRING_HEX_HIGH
+          'hex string (high nibble first)'
+        when STRING_HEX_LOW
+          'hex string (low nibble first)'
+        when STRING_UU
+          'UU-encoded string'
+        when STRING_MIME
+          'quoted printable, MIME encoding'
+        when STRING_BASE64
+          'base64 encoded string'
+        when STRING_FIXED
+          'pointer to a structure (fixed-length string)'
+        when STRING_POINTER
+          'pointer to a null-terminated string'
+        when MOVE
+          'move to absolute position'
+        when BACK
+          'back up a byte'
+        when NULL
+          'null byte'
+        else
+          raise
+        end
+      end
+    end
+
+    class Format
+      attr_reader :directives, :encoding
+
+      def initialize(directives, encoding)
+        @directives = directives
+        @encoding = encoding
+      end
+
+      def describe
+        source_width = directives.map { |d| d.source.inspect.length }.max
+        directive_lines = directives.map do |directive|
+          if directive.type == SPACE
+            source = directive.source.inspect
+          else
+            source = directive.source
+          end
+          "  #{source.ljust(source_width)}  #{directive.describe}"
+        end
+
+        (['Directives:'] + directive_lines + ['Encoding:', "  #{encoding}"]).join("\n")
+      end
+    end
+  end
+end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
new file mode 100644
index 0000000000..2d9d855b86
--- /dev/null
+++ b/lib/prism/parse_result.rb
@@ -0,0 +1,266 @@
+# frozen_string_literal: true
+
+module YARP
+  # This represents a source of Ruby code that has been parsed. It is used in
+  # conjunction with locations to allow them to resolve line numbers and source
+  # ranges.
+  class Source
+    attr_reader :source, :offsets
+
+    def initialize(source, offsets = compute_offsets(source))
+      @source = source
+      @offsets = offsets
+    end
+
+    def slice(offset, length)
+      source.byteslice(offset, length)
+    end
+
+    def line(value)
+      offsets.bsearch_index { |offset| offset > value } || offsets.length
+    end
+
+    def line_offset(value)
+      offsets[line(value) - 1]
+    end
+
+    def column(value)
+      value - offsets[line(value) - 1]
+    end
+
+    private
+
+    def compute_offsets(code)
+      offsets = [0]
+      code.b.scan("\n") { offsets << $~.end(0) }
+      offsets
+    end
+  end
+
+  # This represents a location in the source.
+  class Location
+    # A Source object that is used to determine more information from the given
+    # offset and length.
+    protected attr_reader :source
+
+    # The byte offset from the beginning of the source where this location
+    # starts.
+    attr_reader :start_offset
+
+    # The length of this location in bytes.
+    attr_reader :length
+
+    # The list of comments attached to this location
+    attr_reader :comments
+
+    def initialize(source, start_offset, length)
+      @source = source
+      @start_offset = start_offset
+      @length = length
+      @comments = []
+    end
+
+    # Create a new location object with the given options.
+    def copy(**options)
+      Location.new(
+        options.fetch(:source) { source },
+        options.fetch(:start_offset) { start_offset },
+        options.fetch(:length) { length }
+      )
+    end
+
+    # Returns a string representation of this location.
+    def inspect
+      "#<YARP::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
+    end
+
+    # The source code that this location represents.
+    def slice
+      source.slice(start_offset, length)
+    end
+
+    # The byte offset from the beginning of the source where this location ends.
+    def end_offset
+      start_offset + length
+    end
+
+    # The line number where this location starts.
+    def start_line
+      source.line(start_offset)
+    end
+
+    # The content of the line where this location starts before this location.
+    def start_line_slice
+      offset = source.line_offset(start_offset)
+      source.slice(offset, start_offset - offset)
+    end
+
+    # The line number where this location ends.
+    def end_line
+      source.line(end_offset - 1)
+    end
+
+    # The column number in bytes where this location starts from the start of
+    # the line.
+    def start_column
+      source.column(start_offset)
+    end
+
+    # The column number in bytes where this location ends from the start of the
+    # line.
+    def end_column
+      source.column(end_offset)
+    end
+
+    def deconstruct_keys(keys)
+      { start_offset: start_offset, end_offset: end_offset }
+    end
+
+    def pretty_print(q)
+      q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column}))")
+    end
+
+    def ==(other)
+      other.is_a?(Location) &&
+        other.start_offset == start_offset &&
+        other.end_offset == end_offset
+    end
+
+    # Returns a new location that stretches from this location to the given
+    # other location. Raises an error if this location is not before the other
+    # location or if they don't share the same source.
+    def join(other)
+      raise "Incompatible sources" if source != other.source
+      raise "Incompatible locations" if start_offset > other.start_offset
+
+      Location.new(source, start_offset, other.end_offset - start_offset)
+    end
+
+    def self.null
+      new(0, 0)
+    end
+  end
+
+  # This represents a comment that was encountered during parsing.
+  class Comment
+    TYPES = [:inline, :embdoc, :__END__]
+
+    attr_reader :type, :location
+
+    def initialize(type, location)
+      @type = type
+      @location = location
+    end
+
+    def deconstruct_keys(keys)
+      { type: type, location: location }
+    end
+
+    # Returns true if the comment happens on the same line as other code and false if the comment is by itself
+    def trailing?
+      type == :inline && !location.start_line_slice.strip.empty?
+    end
+
+    def inspect
+      "#<YARP::Comment @type=#{@type.inspect} @location=#{@location.inspect}>"
+    end
+  end
+
+  # This represents an error that was encountered during parsing.
+  class ParseError
+    attr_reader :message, :location
+
+    def initialize(message, location)
+      @message = message
+      @location = location
+    end
+
+    def deconstruct_keys(keys)
+      { message: message, location: location }
+    end
+
+    def inspect
+      "#<YARP::ParseError @message=#{@message.inspect} @location=#{@location.inspect}>"
+    end
+  end
+
+  # This represents a warning that was encountered during parsing.
+  class ParseWarning
+    attr_reader :message, :location
+
+    def initialize(message, location)
+      @message = message
+      @location = location
+    end
+
+    def deconstruct_keys(keys)
+      { message: message, location: location }
+    end
+
+    def inspect
+      "#<YARP::ParseWarning @message=#{@message.inspect} @location=#{@location.inspect}>"
+    end
+  end
+
+  # This represents the result of a call to ::parse or ::parse_file. It contains
+  # the AST, any comments that were encounters, and any errors that were
+  # encountered.
+  class ParseResult
+    attr_reader :value, :comments, :errors, :warnings, :source
+
+    def initialize(value, comments, errors, warnings, source)
+      @value = value
+      @comments = comments
+      @errors = errors
+      @warnings = warnings
+      @source = source
+    end
+
+    def deconstruct_keys(keys)
+      { value: value, comments: comments, errors: errors, warnings: warnings }
+    end
+
+    def success?
+      errors.empty?
+    end
+
+    def failure?
+      !success?
+    end
+  end
+
+  # This represents a token from the Ruby source.
+  class Token
+    attr_reader :type, :value, :location
+
+    def initialize(type, value, location)
+      @type = type
+      @value = value
+      @location = location
+    end
+
+    def deconstruct_keys(keys)
+      { type: type, value: value, location: location }
+    end
+
+    def pretty_print(q)
+      q.group do
+        q.text(type.to_s)
+        self.location.pretty_print(q)
+        q.text("(")
+        q.nest(2) do
+          q.breakable("")
+          q.pp(value)
+        end
+        q.breakable("")
+        q.text(")")
+      end
+    end
+
+    def ==(other)
+      other.is_a?(Token) &&
+        other.type == type &&
+        other.value == value
+    end
+  end
+end
diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb
new file mode 100644
index 0000000000..88240609b1
--- /dev/null
+++ b/lib/prism/parse_result/comments.rb
@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+
+module YARP
+  class ParseResult
+    # When we've parsed the source, we have both the syntax tree and the list of
+    # comments that we found in the source. This class is responsible for
+    # walking the tree and finding the nearest location to attach each comment.
+    #
+    # It does this by first finding the nearest locations to each comment.
+    # Locations can either come from nodes directly or from location fields on
+    # nodes. For example, a `ClassNode` has an overall location encompassing the
+    # entire class, but it also has a location for the `class` keyword.
+    #
+    # Once the nearest locations are found, it determines which one to attach
+    # to. If it's a trailing comment (a comment on the same line as other source
+    # code), it will favor attaching to the nearest location that occurs before
+    # the comment. Otherwise it will favor attaching to the nearest location
+    # that is after the comment.
+    class Comments
+      # A target for attaching comments that is based on a specific node's
+      # location.
+      class NodeTarget
+        attr_reader :node
+
+        def initialize(node)
+          @node = node
+        end
+
+        def start_offset
+          node.location.start_offset
+        end
+
+        def end_offset
+          node.location.end_offset
+        end
+
+        def encloses?(comment)
+          start_offset <= comment.location.start_offset &&
+            comment.location.end_offset <= end_offset
+        end
+
+        def <<(comment)
+          node.location.comments << comment
+        end
+      end
+
+      # A target for attaching comments that is based on a location field on a
+      # node. For example, the `end` token of a ClassNode.
+      class LocationTarget
+        attr_reader :location
+
+        def initialize(location)
+          @location = location
+        end
+
+        def start_offset
+          location.start_offset
+        end
+
+        def end_offset
+          location.end_offset
+        end
+
+        def encloses?(comment)
+          false
+        end
+
+        def <<(comment)
+          location.comments << comment
+        end
+      end
+
+      attr_reader :parse_result
+
+      def initialize(parse_result)
+        @parse_result = parse_result
+      end
+
+      def attach!
+        parse_result.comments.each do |comment|
+          preceding, enclosing, following = nearest_targets(parse_result.value, comment)
+          target =
+            if comment.trailing?
+              preceding || following || enclosing || NodeTarget.new(parse_result.value)
+            else
+              # If a comment exists on its own line, prefer a leading comment.
+              following || preceding || enclosing || NodeTarget.new(parse_result.value)
+            end
+
+          target << comment
+        end
+      end
+
+      private
+
+      # Responsible for finding the nearest targets to the given comment within
+      # the context of the given encapsulating node.
+      def nearest_targets(node, comment)
+        comment_start = comment.location.start_offset
+        comment_end = comment.location.end_offset
+
+        targets = []
+        node.comment_targets.map do |value|
+          case value
+          when StatementsNode
+            targets.concat(value.body.map { |node| NodeTarget.new(node) })
+          when Node
+            targets << NodeTarget.new(value)
+          when Location
+            targets << LocationTarget.new(value)
+          end
+        end
+
+        targets.sort_by!(&:start_offset)
+        preceding = nil
+        following = nil
+
+        left = 0
+        right = targets.length
+
+        # This is a custom binary search that finds the nearest nodes to the
+        # given comment. When it finds a node that completely encapsulates the
+        # comment, it recurses downward into the tree.
+        while left < right
+          middle = (left + right) / 2
+          target = targets[middle]
+
+          target_start = target.start_offset
+          target_end = target.end_offset
+
+          if target.encloses?(comment)
+            # The comment is completely contained by this target. Abandon the
+            # binary search at this level.
+            return nearest_targets(target.node, comment)
+          end
+
+          if target_end <= comment_start
+            # This target falls completely before the comment. Because we will
+            # never consider this target or any targets before it again, this
+            # target must be the closest preceding target we have encountered so
+            # far.
+            preceding = target
+            left = middle + 1
+            next
+          end
+
+          if comment_end <= target_start
+            # This target falls completely after the comment. Because we will
+            # never consider this target or any targets after it again, this
+            # target must be the closest following target we have encountered so
+            # far.
+            following = target
+            right = middle
+            next
+          end
+
+          # This should only happen if there is a bug in this parser.
+          raise "Comment location overlaps with a target location"
+        end
+
+        [preceding, NodeTarget.new(node), following]
+      end
+    end
+
+    private_constant :Comments
+
+    # Attach the list of comments to their respective locations in the tree.
+    def attach_comments!
+      Comments.new(self).attach!
+    end
+  end
+end
diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb
new file mode 100644
index 0000000000..d16600afd0
--- /dev/null
+++ b/lib/prism/parse_result/newlines.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module YARP
+  class ParseResult
+    # The :line tracepoint event gets fired whenever the Ruby VM encounters an
+    # expression on a new line. The types of expressions that can trigger this
+    # event are:
+    #
+    # * if statements
+    # * unless statements
+    # * nodes that are children of statements lists
+    #
+    # In order to keep track of the newlines, we have a list of offsets that
+    # come back from the parser. We assign these offsets to the first nodes that
+    # we find in the tree that are on those lines.
+    #
+    # Note that the logic in this file should be kept in sync with the Java
+    # MarkNewlinesVisitor, since that visitor is responsible for marking the
+    # newlines for JRuby/TruffleRuby.
+    class Newlines < Visitor
+      def initialize(newline_marked)
+        @newline_marked = newline_marked
+      end
+
+      def visit_block_node(node)
+        old_newline_marked = @newline_marked
+        @newline_marked = Array.new(old_newline_marked.size, false)
+
+        begin
+          super(node)
+        ensure
+          @newline_marked = old_newline_marked
+        end
+      end
+
+      alias_method :visit_lambda_node, :visit_block_node
+
+      def visit_if_node(node)
+        node.set_newline_flag(@newline_marked)
+        super(node)
+      end
+
+      alias_method :visit_unless_node, :visit_if_node
+
+      def visit_statements_node(node)
+        node.body.each do |child|
+          child.set_newline_flag(@newline_marked)
+        end
+        super(node)
+      end
+    end
+
+    private_constant :Newlines
+
+    # Walk the tree and mark nodes that are on a new line.
+    def mark_newlines!
+      value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
+    end
+  end
+end
diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb
new file mode 100644
index 0000000000..f7519137e4
--- /dev/null
+++ b/lib/prism/pattern.rb
@@ -0,0 +1,239 @@
+# frozen_string_literal: true
+
+module YARP
+  # A pattern is an object that wraps a Ruby pattern matching expression. The
+  # expression would normally be passed to an `in` clause within a `case`
+  # expression or a rightward assignment expression. For example, in the
+  # following snippet:
+  #
+  #     case node
+  #     in ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]
+  #     end
+  #
+  # the pattern is the `ConstantPathNode[...]` expression.
+  #
+  # The pattern gets compiled into an object that responds to #call by running
+  # the #compile method. This method itself will run back through YARP to
+  # parse the expression into a tree, then walk the tree to generate the
+  # necessary callable objects. For example, if you wanted to compile the
+  # expression above into a callable, you would:
+  #
+  #     callable = YARP::Pattern.new("ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]").compile
+  #     callable.call(node)
+  #
+  # The callable object returned by #compile is guaranteed to respond to #call
+  # with a single argument, which is the node to match against. It also is
+  # guaranteed to respond to #===, which means it itself can be used in a `case`
+  # expression, as in:
+  #
+  #     case node
+  #     when callable
+  #     end
+  #
+  # If the query given to the initializer cannot be compiled into a valid
+  # matcher (either because of a syntax error or because it is using syntax we
+  # do not yet support) then a YARP::Pattern::CompilationError will be
+  # raised.
+  class Pattern
+    # Raised when the query given to a pattern is either invalid Ruby syntax or
+    # is using syntax that we don't yet support.
+    class CompilationError < StandardError
+      def initialize(repr)
+        super(<<~ERROR)
+          YARP was unable to compile the pattern you provided into a usable
+          expression. It failed on to understand the node represented by:
+
+          #{repr}
+
+          Note that not all syntax supported by Ruby's pattern matching syntax
+          is also supported by YARP's patterns. If you're using some syntax
+          that you believe should be supported, please open an issue on
+          GitHub at https://github.com/ruby/yarp/issues/new.
+        ERROR
+      end
+    end
+
+    attr_reader :query
+
+    def initialize(query)
+      @query = query
+      @compiled = nil
+    end
+
+    def compile
+      result = YARP.parse("case nil\nin #{query}\nend")
+      compile_node(result.value.statements.body.last.conditions.last.pattern)
+    end
+
+    def scan(root)
+      return to_enum(__method__, root) unless block_given?
+
+      @compiled ||= compile
+      queue = [root]
+
+      while (node = queue.shift)
+        yield node if @compiled.call(node)
+        queue.concat(node.compact_child_nodes)
+      end
+    end
+
+    private
+
+    # Shortcut for combining two procs into one that returns true if both return
+    # true.
+    def combine_and(left, right)
+      ->(other) { left.call(other) && right.call(other) }
+    end
+
+    # Shortcut for combining two procs into one that returns true if either
+    # returns true.
+    def combine_or(left, right)
+      ->(other) { left.call(other) || right.call(other) }
+    end
+
+    # Raise an error because the given node is not supported.
+    def compile_error(node)
+      raise CompilationError, node.inspect
+    end
+
+    # in [foo, bar, baz]
+    def compile_array_pattern_node(node)
+      compile_error(node) if !node.rest.nil? || node.posts.any?
+
+      constant = node.constant
+      compiled_constant = compile_node(constant) if constant
+
+      preprocessed = node.requireds.map { |required| compile_node(required) }
+
+      compiled_requireds = ->(other) do
+        deconstructed = other.deconstruct
+
+        deconstructed.length == preprocessed.length &&
+          preprocessed
+            .zip(deconstructed)
+            .all? { |(matcher, value)| matcher.call(value) }
+      end
+
+      if compiled_constant
+        combine_and(compiled_constant, compiled_requireds)
+      else
+        compiled_requireds
+      end
+    end
+
+    # in foo | bar
+    def compile_alternation_pattern_node(node)
+      combine_or(compile_node(node.left), compile_node(node.right))
+    end
+
+    # in YARP::ConstantReadNode
+    def compile_constant_path_node(node)
+      parent = node.parent
+
+      if parent.is_a?(ConstantReadNode) && parent.slice == "YARP"
+        compile_node(node.child)
+      else
+        compile_error(node)
+      end
+    end
+
+    # in ConstantReadNode
+    # in String
+    def compile_constant_read_node(node)
+      value = node.slice
+
+      if YARP.const_defined?(value, false)
+        clazz = YARP.const_get(value)
+
+        ->(other) { clazz === other }
+      elsif Object.const_defined?(value, false)
+        clazz = Object.const_get(value)
+
+        ->(other) { clazz === other }
+      else
+        compile_error(node)
+      end
+    end
+
+    # in InstanceVariableReadNode[name: Symbol]
+    # in { name: Symbol }
+    def compile_hash_pattern_node(node)
+      compile_error(node) unless node.kwrest.nil?
+      compiled_constant = compile_node(node.constant) if node.constant
+
+      preprocessed =
+        node.assocs.to_h do |assoc|
+          [assoc.key.unescaped.to_sym, compile_node(assoc.value)]
+        end
+
+      compiled_keywords = ->(other) do
+        deconstructed = other.deconstruct_keys(preprocessed.keys)
+
+        preprocessed.all? do |keyword, matcher|
+          deconstructed.key?(keyword) && matcher.call(deconstructed[keyword])
+        end
+      end
+
+      if compiled_constant
+        combine_and(compiled_constant, compiled_keywords)
+      else
+        compiled_keywords
+      end
+    end
+
+    # in nil
+    def compile_nil_node(node)
+      ->(attribute) { attribute.nil? }
+    end
+
+    # in /foo/
+    def compile_regular_expression_node(node)
+      regexp = Regexp.new(node.unescaped, node.closing[1..])
+
+      ->(attribute) { regexp === attribute }
+    end
+
+    # in ""
+    # in "foo"
+    def compile_string_node(node)
+      string = node.unescaped
+
+      ->(attribute) { string === attribute }
+    end
+
+    # in :+
+    # in :foo
+    def compile_symbol_node(node)
+      symbol = node.unescaped.to_sym
+
+      ->(attribute) { symbol === attribute }
+    end
+
+    # Compile any kind of node. Dispatch out to the individual compilation
+    # methods based on the type of node.
+    def compile_node(node)
+      case node
+      when AlternationPatternNode
+        compile_alternation_pattern_node(node)
+      when ArrayPatternNode
+        compile_array_pattern_node(node)
+      when ConstantPathNode
+        compile_constant_path_node(node)
+      when ConstantReadNode
+        compile_constant_read_node(node)
+      when HashPatternNode
+        compile_hash_pattern_node(node)
+      when NilNode
+        compile_nil_node(node)
+      when RegularExpressionNode
+        compile_regular_expression_node(node)
+      when StringNode
+        compile_string_node(node)
+      when SymbolNode
+        compile_symbol_node(node)
+      else
+        compile_error(node)
+      end
+    end
+  end
+end
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
new file mode 100644
index 0000000000..d1a7bbbbcf
--- /dev/null
+++ b/lib/prism/prism.gemspec
@@ -0,0 +1,113 @@
+# frozen_string_literal: true
+
+Gem::Specification.new do |spec|
+  spec.name = "yarp"
+  spec.version = "0.12.0"
+  spec.authors = ["Shopify"]
+  spec.email = ["ruby@shopify.com"]
+
+  spec.summary = "Yet Another Ruby Parser"
+  spec.homepage = "https://github.com/ruby/yarp"
+  spec.license = "MIT"
+
+  spec.required_ruby_version = ">= 3.0.0"
+
+  spec.require_paths = ["lib"]
+  spec.files = [
+    "CHANGELOG.md",
+    "CODE_OF_CONDUCT.md",
+    "CONTRIBUTING.md",
+    "LICENSE.md",
+    "Makefile",
+    "README.md",
+    "config.yml",
+    "docs/build_system.md",
+    "docs/building.md",
+    "docs/configuration.md",
+    "docs/design.md",
+    "docs/encoding.md",
+    "docs/fuzzing.md",
+    "docs/heredocs.md",
+    "docs/mapping.md",
+    "docs/ripper.md",
+    "docs/ruby_api.md",
+    "docs/serialization.md",
+    "docs/testing.md",
+    "ext/yarp/api_node.c",
+    "ext/yarp/api_pack.c",
+    "ext/yarp/extension.c",
+    "ext/yarp/extension.h",
+    "include/yarp.h",
+    "include/yarp/ast.h",
+    "include/yarp/defines.h",
+    "include/yarp/diagnostic.h",
+    "include/yarp/enc/yp_encoding.h",
+    "include/yarp/node.h",
+    "include/yarp/pack.h",
+    "include/yarp/parser.h",
+    "include/yarp/regexp.h",
+    "include/yarp/unescape.h",
+    "include/yarp/util/yp_buffer.h",
+    "include/yarp/util/yp_char.h",
+    "include/yarp/util/yp_constant_pool.h",
+    "include/yarp/util/yp_list.h",
+    "include/yarp/util/yp_memchr.h",
+    "include/yarp/util/yp_newline_list.h",
+    "include/yarp/util/yp_state_stack.h",
+    "include/yarp/util/yp_string.h",
+    "include/yarp/util/yp_string_list.h",
+    "include/yarp/util/yp_strpbrk.h",
+    "include/yarp/version.h",
+    "lib/yarp.rb",
+    "lib/yarp/compiler.rb",
+    "lib/yarp/debug.rb",
+    "lib/yarp/desugar_compiler.rb",
+    "lib/yarp/dispatcher.rb",
+    "lib/yarp/dsl.rb",
+    "lib/yarp/ffi.rb",
+    "lib/yarp/lex_compat.rb",
+    "lib/yarp/mutation_compiler.rb",
+    "lib/yarp/node.rb",
+    "lib/yarp/node_ext.rb",
+    "lib/yarp/node_inspector.rb",
+    "lib/yarp/pack.rb",
+    "lib/yarp/parse_result.rb",
+    "lib/yarp/pattern.rb",
+    "lib/yarp/ripper_compat.rb",
+    "lib/yarp/serialize.rb",
+    "lib/yarp/parse_result/comments.rb",
+    "lib/yarp/parse_result/newlines.rb",
+    "lib/yarp/visitor.rb",
+    "src/diagnostic.c",
+    "src/enc/yp_big5.c",
+    "src/enc/yp_euc_jp.c",
+    "src/enc/yp_gbk.c",
+    "src/enc/yp_shift_jis.c",
+    "src/enc/yp_tables.c",
+    "src/enc/yp_unicode.c",
+    "src/enc/yp_windows_31j.c",
+    "src/node.c",
+    "src/pack.c",
+    "src/prettyprint.c",
+    "src/regexp.c",
+    "src/serialize.c",
+    "src/token_type.c",
+    "src/unescape.c",
+    "src/util/yp_buffer.c",
+    "src/util/yp_char.c",
+    "src/util/yp_constant_pool.c",
+    "src/util/yp_list.c",
+    "src/util/yp_memchr.c",
+    "src/util/yp_newline_list.c",
+    "src/util/yp_state_stack.c",
+    "src/util/yp_string.c",
+    "src/util/yp_string_list.c",
+    "src/util/yp_strncasecmp.c",
+    "src/util/yp_strpbrk.c",
+    "src/yarp.c",
+    "yarp.gemspec",
+  ]
+
+  spec.extensions = ["ext/yarp/extconf.rb"]
+  spec.metadata["allowed_push_host"] = "https://rubygems.org"
+end
diff --git a/lib/prism/ripper_compat.rb b/lib/prism/ripper_compat.rb
new file mode 100644
index 0000000000..c76f3fd07a
--- /dev/null
+++ b/lib/prism/ripper_compat.rb
@@ -0,0 +1,174 @@
+# frozen_string_literal: true
+
+require "ripper"
+
+module YARP
+  # This class is meant to provide a compatibility layer between YARP and
+  # Ripper. It functions by parsing the entire tree first and then walking it
+  # and executing each of the Ripper callbacks as it goes.
+  #
+  # This class is going to necessarily be slower than the native Ripper API. It
+  # is meant as a stopgap until developers migrate to using YARP. It is also
+  # meant as a test harness for the YARP parser.
+  class RipperCompat
+    # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
+    # returns the arrays of [type, *children].
+    class SexpBuilder < RipperCompat
+      private
+
+      Ripper::PARSER_EVENTS.each do |event|
+        define_method(:"on_#{event}") do |*args|
+          [event, *args]
+        end
+      end
+
+      Ripper::SCANNER_EVENTS.each do |event|
+        define_method(:"on_#{event}") do |value|
+          [:"@#{event}", value, [lineno, column]]
+        end
+      end
+    end
+
+    # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
+    # returns the same values as ::Ripper::SexpBuilder except with a couple of
+    # niceties that flatten linked lists into arrays.
+    class SexpBuilderPP < SexpBuilder
+      private
+
+      def _dispatch_event_new
+        []
+      end
+
+      def _dispatch_event_push(list, item)
+        list << item
+        list
+      end
+
+      Ripper::PARSER_EVENT_TABLE.each do |event, arity|
+        case event
+        when /_new\z/
+          alias_method :"on_#{event}", :_dispatch_event_new if arity == 0
+        when /_add\z/
+          alias_method :"on_#{event}", :_dispatch_event_push
+        end
+      end
+    end
+
+    attr_reader :source, :lineno, :column
+
+    def initialize(source)
+      @source = source
+      @result = nil
+      @lineno = nil
+      @column = nil
+    end
+
+    ############################################################################
+    # Public interface
+    ############################################################################
+
+    def error?
+      result.errors.any?
+    end
+
+    def parse
+      result.value.accept(self) unless error?
+    end
+
+    ############################################################################
+    # Visitor methods
+    ############################################################################
+
+    def visit(node)
+      node&.accept(self)
+    end
+
+    def visit_call_node(node)
+      if !node.opening_loc && node.arguments.arguments.length == 1
+        bounds(node.receiver.location)
+        left = visit(node.receiver)
+
+        bounds(node.arguments.arguments.first.location)
+        right = visit(node.arguments.arguments.first)
+
+        on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
+      else
+        raise NotImplementedError
+      end
+    end
+
+    def visit_integer_node(node)
+      bounds(node.location)
+      on_int(source[node.location.start_offset...node.location.end_offset])
+    end
+
+    def visit_statements_node(node)
+      bounds(node.location)
+      node.body.inject(on_stmts_new) do |stmts, stmt|
+        on_stmts_add(stmts, visit(stmt))
+      end
+    end
+
+    def visit_token(node)
+      bounds(node.location)
+
+      case node.type
+      when :MINUS
+        on_op(node.value)
+      when :PLUS
+        on_op(node.value)
+      else
+        raise NotImplementedError, "Unknown token: #{node.type}"
+      end
+    end
+
+    def visit_program_node(node)
+      bounds(node.location)
+      on_program(visit(node.statements))
+    end
+
+    ############################################################################
+    # Entrypoints for subclasses
+    ############################################################################
+
+    # This is a convenience method that runs the SexpBuilder subclass parser.
+    def self.sexp_raw(source)
+      SexpBuilder.new(source).parse
+    end
+
+    # This is a convenience method that runs the SexpBuilderPP subclass parser.
+    def self.sexp(source)
+      SexpBuilderPP.new(source).parse
+    end
+
+    private
+
+    # This method is responsible for updating lineno and column information
+    # to reflect the current node.
+    #
+    # This method could be drastically improved with some caching on the start
+    # of every line, but for now it's good enough.
+    def bounds(location)
+      start_offset = location.start_offset
+
+      @lineno = source[0..start_offset].count("\n") + 1
+      @column = start_offset - (source.rindex("\n", start_offset) || 0)
+    end
+
+    def result
+      @result ||= YARP.parse(source)
+    end
+
+    def _dispatch0; end
+    def _dispatch1(_); end
+    def _dispatch2(_, _); end
+    def _dispatch3(_, _, _); end
+    def _dispatch4(_, _, _, _); end
+    def _dispatch5(_, _, _, _, _); end
+    def _dispatch7(_, _, _, _, _, _, _); end
+
+    (Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
+      alias_method :"on_#{event}", :"_dispatch#{arity}"
+    end
+  end
+end
diff --git a/lib/prism/version.rb b/lib/prism/version.rb
new file mode 100644
index 0000000000..e450bfb526
--- /dev/null
+++ b/lib/prism/version.rb
@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+module YARP
+  VERSION = "0.8.0"
+end
author	Kevin Newton <kddnewton@gmail.com>	2023-09-27 12:22:36 -0400
committer	Kevin Newton <kddnewton@gmail.com>	2023-09-27 13:57:38 -0400
commit	8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch)
tree	46ef2bd5c51d5b7f923eda6a60edefc7a08200db /lib/prism
parent	7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff)