diff options
Diffstat (limited to 'lib/prism')
-rw-r--r-- | lib/prism/desugar_compiler.rb | 354 | ||||
-rw-r--r-- | lib/prism/ffi.rb | 437 | ||||
-rw-r--r-- | lib/prism/lex_compat.rb | 927 | ||||
-rw-r--r-- | lib/prism/node_ext.rb | 443 | ||||
-rw-r--r-- | lib/prism/pack.rb | 228 | ||||
-rw-r--r-- | lib/prism/parse_result.rb | 698 | ||||
-rw-r--r-- | lib/prism/parse_result/comments.rb | 187 | ||||
-rw-r--r-- | lib/prism/parse_result/newlines.rb | 153 | ||||
-rw-r--r-- | lib/prism/pattern.rb | 268 | ||||
-rw-r--r-- | lib/prism/polyfill/byteindex.rb | 13 | ||||
-rw-r--r-- | lib/prism/polyfill/unpack1.rb | 14 | ||||
-rw-r--r-- | lib/prism/prism.gemspec | 161 | ||||
-rw-r--r-- | lib/prism/translation.rb | 13 | ||||
-rw-r--r-- | lib/prism/translation/parser.rb | 307 | ||||
-rw-r--r-- | lib/prism/translation/parser/compiler.rb | 2111 | ||||
-rw-r--r-- | lib/prism/translation/parser/lexer.rb | 416 | ||||
-rw-r--r-- | lib/prism/translation/parser/rubocop.rb | 73 | ||||
-rw-r--r-- | lib/prism/translation/parser33.rb | 12 | ||||
-rw-r--r-- | lib/prism/translation/parser34.rb | 12 | ||||
-rw-r--r-- | lib/prism/translation/ripper.rb | 3452 | ||||
-rw-r--r-- | lib/prism/translation/ripper/sexp.rb | 125 | ||||
-rw-r--r-- | lib/prism/translation/ripper/shim.rb | 5 | ||||
-rw-r--r-- | lib/prism/translation/ruby_parser.rb | 1594 |
23 files changed, 12003 insertions, 0 deletions
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb new file mode 100644 index 0000000000..de02445149 --- /dev/null +++ b/lib/prism/desugar_compiler.rb @@ -0,0 +1,354 @@ +# frozen_string_literal: true + +module Prism + class DesugarAndWriteNode # :nodoc: + attr_reader :node, :source, :read_class, :write_class, :arguments + + def initialize(node, source, read_class, write_class, *arguments) + @node = node + @source = source + @read_class = read_class + @write_class = write_class + @arguments = arguments + end + + # Desugar `x &&= y` to `x && x = y` + def compile + AndNode.new( + source, + read_class.new(source, *arguments, node.name_loc), + write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location), + node.operator_loc, + node.location + ) + end + end + + class DesugarOrWriteDefinedNode # :nodoc: + attr_reader :node, :source, :read_class, :write_class, :arguments + + def initialize(node, source, read_class, write_class, *arguments) + @node = node + @source = source + @read_class = read_class + @write_class = write_class + @arguments = arguments + end + + # Desugar `x ||= y` to `defined?(x) ? x : x = y` + def compile + IfNode.new( + source, + node.operator_loc, + DefinedNode.new(source, nil, read_class.new(source, *arguments, node.name_loc), nil, node.operator_loc, node.name_loc), + node.operator_loc, + StatementsNode.new(source, [read_class.new(source, *arguments, node.name_loc)], node.location), + ElseNode.new( + source, + node.operator_loc, + StatementsNode.new( + source, + [write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location)], + node.location + ), + node.operator_loc, + node.location + ), + node.operator_loc, + node.location + ) + end + end + + class DesugarOperatorWriteNode # :nodoc: + attr_reader :node, :source, :read_class, :write_class, :arguments + + def initialize(node, source, read_class, write_class, *arguments) + @node = node + @source = source + @read_class = read_class + @write_class = write_class + @arguments = arguments + end + + # Desugar `x += y` to `x = x + y` + def compile + binary_operator_loc = node.binary_operator_loc.chop + + write_class.new( + source, + *arguments, + node.name_loc, + CallNode.new( + source, + 0, + read_class.new(source, *arguments, node.name_loc), + nil, + binary_operator_loc.slice.to_sym, + binary_operator_loc, + nil, + ArgumentsNode.new(source, 0, [node.value], node.value.location), + nil, + nil, + node.location + ), + node.binary_operator_loc.copy(start_offset: node.binary_operator_loc.end_offset - 1, length: 1), + node.location + ) + end + end + + class DesugarOrWriteNode # :nodoc: + attr_reader :node, :source, :read_class, :write_class, :arguments + + def initialize(node, source, read_class, write_class, *arguments) + @node = node + @source = source + @read_class = read_class + @write_class = write_class + @arguments = arguments + end + + # Desugar `x ||= y` to `x || x = y` + def compile + OrNode.new( + source, + read_class.new(source, *arguments, node.name_loc), + write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location), + node.operator_loc, + node.location + ) + end + end + + private_constant :DesugarAndWriteNode, :DesugarOrWriteNode, :DesugarOrWriteDefinedNode, :DesugarOperatorWriteNode + + class ClassVariableAndWriteNode + def desugar # :nodoc: + DesugarAndWriteNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile + end + end + + class ClassVariableOrWriteNode + def desugar # :nodoc: + DesugarOrWriteDefinedNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile + end + end + + class ClassVariableOperatorWriteNode + def desugar # :nodoc: + DesugarOperatorWriteNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile + end + end + + class ConstantAndWriteNode + def desugar # :nodoc: + DesugarAndWriteNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile + end + end + + class ConstantOrWriteNode + def desugar # :nodoc: + DesugarOrWriteDefinedNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile + end + end + + class ConstantOperatorWriteNode + def desugar # :nodoc: + DesugarOperatorWriteNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile + end + end + + class GlobalVariableAndWriteNode + def desugar # :nodoc: + DesugarAndWriteNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile + end + end + + class GlobalVariableOrWriteNode + def desugar # :nodoc: + DesugarOrWriteDefinedNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile + end + end + + class GlobalVariableOperatorWriteNode + def desugar # :nodoc: + DesugarOperatorWriteNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile + end + end + + class InstanceVariableAndWriteNode + def desugar # :nodoc: + DesugarAndWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile + end + end + + class InstanceVariableOrWriteNode + def desugar # :nodoc: + DesugarOrWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile + end + end + + class InstanceVariableOperatorWriteNode + def desugar # :nodoc: + DesugarOperatorWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile + end + end + + class LocalVariableAndWriteNode + def desugar # :nodoc: + DesugarAndWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile + end + end + + class LocalVariableOrWriteNode + def desugar # :nodoc: + DesugarOrWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile + end + end + + class LocalVariableOperatorWriteNode + def desugar # :nodoc: + DesugarOperatorWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile + end + end + + # DesugarCompiler is a compiler that desugars Ruby code into a more primitive + # form. This is useful for consumers that want to deal with fewer node types. + class DesugarCompiler < MutationCompiler + # @@foo &&= bar + # + # becomes + # + # @@foo && @@foo = bar + def visit_class_variable_and_write_node(node) + node.desugar + end + + # @@foo ||= bar + # + # becomes + # + # defined?(@@foo) ? @@foo : @@foo = bar + def visit_class_variable_or_write_node(node) + node.desugar + end + + # @@foo += bar + # + # becomes + # + # @@foo = @@foo + bar + def visit_class_variable_operator_write_node(node) + node.desugar + end + + # Foo &&= bar + # + # becomes + # + # Foo && Foo = bar + def visit_constant_and_write_node(node) + node.desugar + end + + # Foo ||= bar + # + # becomes + # + # defined?(Foo) ? Foo : Foo = bar + def visit_constant_or_write_node(node) + node.desugar + end + + # Foo += bar + # + # becomes + # + # Foo = Foo + bar + def visit_constant_operator_write_node(node) + node.desugar + end + + # $foo &&= bar + # + # becomes + # + # $foo && $foo = bar + def visit_global_variable_and_write_node(node) + node.desugar + end + + # $foo ||= bar + # + # becomes + # + # defined?($foo) ? $foo : $foo = bar + def visit_global_variable_or_write_node(node) + node.desugar + end + + # $foo += bar + # + # becomes + # + # $foo = $foo + bar + def visit_global_variable_operator_write_node(node) + node.desugar + end + + # @foo &&= bar + # + # becomes + # + # @foo && @foo = bar + def visit_instance_variable_and_write_node(node) + node.desugar + end + + # @foo ||= bar + # + # becomes + # + # @foo || @foo = bar + def visit_instance_variable_or_write_node(node) + node.desugar + end + + # @foo += bar + # + # becomes + # + # @foo = @foo + bar + def visit_instance_variable_operator_write_node(node) + node.desugar + end + + # foo &&= bar + # + # becomes + # + # foo && foo = bar + def visit_local_variable_and_write_node(node) + node.desugar + end + + # foo ||= bar + # + # becomes + # + # foo || foo = bar + def visit_local_variable_or_write_node(node) + node.desugar + end + + # foo += bar + # + # becomes + # + # foo = foo + bar + def visit_local_variable_operator_write_node(node) + node.desugar + end + end +end diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb new file mode 100644 index 0000000000..df7e183310 --- /dev/null +++ b/lib/prism/ffi.rb @@ -0,0 +1,437 @@ +# frozen_string_literal: true +# typed: ignore + +# This file is responsible for mirroring the API provided by the C extension by +# using FFI to call into the shared library. + +require "rbconfig" +require "ffi" + +module Prism + module LibRubyParser # :nodoc: + extend FFI::Library + + # Define the library that we will be pulling functions from. Note that this + # must align with the build shared library from make/rake. + ffi_lib File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) + + # Convert a native C type declaration into a symbol that FFI understands. + # For example: + # + # const char * -> :pointer + # bool -> :bool + # size_t -> :size_t + # void -> :void + # + def self.resolve_type(type, callbacks) + type = type.strip + + if !type.end_with?("*") + type.delete_prefix("const ").to_sym + else + type = type.delete_suffix("*").rstrip + callbacks.include?(type.to_sym) ? type.to_sym : :pointer + end + end + + # Read through the given header file and find the declaration of each of the + # given functions. For each one, define a function with the same name and + # signature as the C function. + def self.load_exported_functions_from(header, *functions, callbacks) + File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line| + # We only want to attempt to load exported functions. + next unless line.start_with?("PRISM_EXPORTED_FUNCTION ") + + # We only want to load the functions that we are interested in. + next unless functions.any? { |function| line.include?(function) } + + # Parse the function declaration. + unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line + raise "Could not parse #{line}" + end + + # Delete the function from the list of functions we are looking for to + # mark it as having been found. + functions.delete(name) + + # Split up the argument types into an array, ensure we handle the case + # where there are no arguments (by explicit void). + arg_types = arg_types.split(",").map(&:strip) + arg_types = [] if arg_types == %w[void] + + # Resolve the type of the argument by dropping the name of the argument + # first if it is present. + arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) } + + # Attach the function using the FFI library. + attach_function name, arg_types, resolve_type(return_type, []) + end + + # If we didn't find all of the functions, raise an error. + raise "Could not find functions #{functions.inspect}" unless functions.empty? + end + + callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer + + load_exported_functions_from( + "prism.h", + "pm_version", + "pm_serialize_parse", + "pm_serialize_parse_stream", + "pm_serialize_parse_comments", + "pm_serialize_lex", + "pm_serialize_parse_lex", + "pm_parse_success_p", + [:pm_parse_stream_fgets_t] + ) + + load_exported_functions_from( + "prism/util/pm_buffer.h", + "pm_buffer_sizeof", + "pm_buffer_init", + "pm_buffer_value", + "pm_buffer_length", + "pm_buffer_free", + [] + ) + + load_exported_functions_from( + "prism/util/pm_string.h", + "pm_string_mapped_init", + "pm_string_free", + "pm_string_source", + "pm_string_length", + "pm_string_sizeof", + [] + ) + + # This object represents a pm_buffer_t. We only use it as an opaque pointer, + # so it doesn't need to know the fields of pm_buffer_t. + class PrismBuffer # :nodoc: + SIZEOF = LibRubyParser.pm_buffer_sizeof + + attr_reader :pointer + + def initialize(pointer) + @pointer = pointer + end + + def value + LibRubyParser.pm_buffer_value(pointer) + end + + def length + LibRubyParser.pm_buffer_length(pointer) + end + + def read + value.read_string(length) + end + + # Initialize a new buffer and yield it to the block. The buffer will be + # automatically freed when the block returns. + def self.with + FFI::MemoryPointer.new(SIZEOF) do |pointer| + raise unless LibRubyParser.pm_buffer_init(pointer) + return yield new(pointer) + ensure + LibRubyParser.pm_buffer_free(pointer) + end + end + end + + # This object represents a pm_string_t. We only use it as an opaque pointer, + # so it doesn't have to be an FFI::Struct. + class PrismString # :nodoc: + SIZEOF = LibRubyParser.pm_string_sizeof + + attr_reader :pointer, :length + + def initialize(pointer, length, from_string) + @pointer = pointer + @length = length + @from_string = from_string + end + + def read + raise "should use the original String instead" if @from_string + @pointer.read_string(@length) + end + + # Yields a pm_string_t pointer to the given block. + def self.with_string(string) + raise TypeError unless string.is_a?(String) + + length = string.bytesize + # + 1 to never get an address of 0, which pm_parser_init() asserts + FFI::MemoryPointer.new(:char, length + 1, false) do |pointer| + pointer.write_string(string) + # since we have the extra byte we might as well \0-terminate + pointer.put_char(length, 0) + return yield new(pointer, length, true) + end + end + + # Yields a pm_string_t pointer to the given block. + def self.with_file(filepath) + raise TypeError unless filepath.is_a?(String) + + FFI::MemoryPointer.new(SIZEOF) do |pm_string| + if LibRubyParser.pm_string_mapped_init(pm_string, filepath) + pointer = LibRubyParser.pm_string_source(pm_string) + length = LibRubyParser.pm_string_length(pm_string) + return yield new(pointer, length, false) + else + raise SystemCallError.new(filepath, FFI.errno) + end + ensure + LibRubyParser.pm_string_free(pm_string) + end + end + end + end + + # Mark the LibRubyParser module as private as it should only be called through + # the prism module. + private_constant :LibRubyParser + + # The version constant is set by reading the result of calling pm_version. + VERSION = LibRubyParser.pm_version.read_string + + class << self + # Mirror the Prism.dump API by using the serialization API. + def dump(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) } + end + + # Mirror the Prism.dump_file API by using the serialization API. + def dump_file(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) } + end + + # Mirror the Prism.lex API by using the serialization API. + def lex(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) } + end + + # Mirror the Prism.lex_file API by using the serialization API. + def lex_file(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) } + end + + # Mirror the Prism.parse API by using the serialization API. + def parse(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) } + end + + # Mirror the Prism.parse_file API by using the serialization API. This uses + # native strings instead of Ruby strings because it allows us to use mmap + # when it is available. + def parse_file(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) } + end + + # Mirror the Prism.parse_stream API by using the serialization API. + def parse_stream(stream, **options) + LibRubyParser::PrismBuffer.with do |buffer| + source = +"" + callback = -> (string, size, _) { + raise "Expected size to be >= 0, got: #{size}" if size <= 0 + + if !(line = stream.gets(size - 1)).nil? + source << line + string.write_string("#{line}\x00", line.bytesize + 1) + end + } + + # In the pm_serialize_parse_stream function it accepts a pointer to the + # IO object as a void* and then passes it through to the callback as the + # third argument, but it never touches it itself. As such, since we have + # access to the IO object already through the closure of the lambda, we + # can pass a null pointer here and not worry. + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options)) + Prism.load(source, buffer.read) + end + end + + # Mirror the Prism.parse_comments API by using the serialization API. + def parse_comments(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } + end + + # Mirror the Prism.parse_file_comments API by using the serialization + # API. This uses native strings instead of Ruby strings because it allows us + # to use mmap when it is available. + def parse_file_comments(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + end + + # Mirror the Prism.parse_lex API by using the serialization API. + def parse_lex(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } + end + + # Mirror the Prism.parse_lex_file API by using the serialization API. + def parse_lex_file(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + end + + # Mirror the Prism.parse_success? API by using the serialization API. + def parse_success?(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_failure? API by using the serialization API. + def parse_failure?(code, **options) + !parse_success?(code, **options) + end + + # Mirror the Prism.parse_file_success? API by using the serialization API. + def parse_file_success?(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_file_failure? API by using the serialization API. + def parse_file_failure?(filepath, **options) + !parse_file_success?(filepath, **options) + end + + private + + def dump_common(string, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + buffer.read + end + end + + def lex_common(string, code, options) # :nodoc: + serialized = LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + buffer.read + end + + Serialize.load_tokens(Source.for(code), serialized) + end + + def parse_common(string, code, options) # :nodoc: + serialized = dump_common(string, options) + Prism.load(code, serialized) + end + + def parse_comments_common(string, code, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) + + source = Source.for(code) + loader = Serialize::Loader.new(source, buffer.read) + + loader.load_header + loader.load_encoding + loader.load_start_line + loader.load_comments + end + end + + def parse_lex_common(string, code, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + + source = Source.for(code) + loader = Serialize::Loader.new(source, buffer.read) + + tokens = loader.load_tokens + node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes + tokens.each { |token,| token.value.force_encoding(loader.encoding) } + + ParseLexResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source) + end + end + + def parse_file_success_common(string, options) # :nodoc: + LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) + end + + # Return the value that should be dumped for the command_line option. + def dump_options_command_line(options) + command_line = options.fetch(:command_line, "") + raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) + + command_line.each_char.inject(0) do |value, char| + case char + when "a" then value | 0b000001 + when "e" then value | 0b000010 + when "l" then value | 0b000100 + when "n" then value | 0b001000 + when "p" then value | 0b010000 + when "x" then value | 0b100000 + else raise ArgumentError, "invalid command_line option: #{char}" + end + end + end + + # Convert the given options into a serialized options string. + def dump_options(options) + template = +"" + values = [] + + template << "L" + if (filepath = options[:filepath]) + values.push(filepath.bytesize, filepath.b) + template << "A*" + else + values << 0 + end + + template << "l" + values << options.fetch(:line, 1) + + template << "L" + if (encoding = options[:encoding]) + name = encoding.is_a?(Encoding) ? encoding.name : encoding + values.push(name.bytesize, name.b) + template << "A*" + else + values << 0 + end + + template << "C" + values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) + + template << "C" + values << dump_options_command_line(options) + + template << "C" + values << { nil => 0, "3.3.0" => 1, "3.3.1" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version]) + + template << "L" + if (scopes = options[:scopes]) + values << scopes.length + + scopes.each do |scope| + template << "L" + values << scope.length + + scope.each do |local| + name = local.name + template << "L" + values << name.bytesize + + template << "A*" + values << name.b + end + end + else + values << 0 + end + + values.pack(template) + end + end +end diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb new file mode 100644 index 0000000000..4f8e443a3b --- /dev/null +++ b/lib/prism/lex_compat.rb @@ -0,0 +1,927 @@ +# frozen_string_literal: true + +require "delegate" +require "ripper" + +module Prism + # This class is responsible for lexing the source using prism and then + # converting those tokens to be compatible with Ripper. In the vast majority + # of cases, this is a one-to-one mapping of the token type. Everything else + # generally lines up. However, there are a few cases that require special + # handling. + class LexCompat # :nodoc: + # A result class specialized for holding tokens produced by the lexer. + class Result < Prism::Result + # The list of tokens that were produced by the lexer. + attr_reader :value + + # Create a new lex compat result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for Result. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This is a mapping of prism token types to Ripper token types. This is a + # many-to-one mapping because we split up our token types, whereas Ripper + # tends to group them. + RIPPER = { + AMPERSAND: :on_op, + AMPERSAND_AMPERSAND: :on_op, + AMPERSAND_AMPERSAND_EQUAL: :on_op, + AMPERSAND_DOT: :on_op, + AMPERSAND_EQUAL: :on_op, + BACK_REFERENCE: :on_backref, + BACKTICK: :on_backtick, + BANG: :on_op, + BANG_EQUAL: :on_op, + BANG_TILDE: :on_op, + BRACE_LEFT: :on_lbrace, + BRACE_RIGHT: :on_rbrace, + BRACKET_LEFT: :on_lbracket, + BRACKET_LEFT_ARRAY: :on_lbracket, + BRACKET_LEFT_RIGHT: :on_op, + BRACKET_LEFT_RIGHT_EQUAL: :on_op, + BRACKET_RIGHT: :on_rbracket, + CARET: :on_op, + CARET_EQUAL: :on_op, + CHARACTER_LITERAL: :on_CHAR, + CLASS_VARIABLE: :on_cvar, + COLON: :on_op, + COLON_COLON: :on_op, + COMMA: :on_comma, + COMMENT: :on_comment, + CONSTANT: :on_const, + DOT: :on_period, + DOT_DOT: :on_op, + DOT_DOT_DOT: :on_op, + EMBDOC_BEGIN: :on_embdoc_beg, + EMBDOC_END: :on_embdoc_end, + EMBDOC_LINE: :on_embdoc, + EMBEXPR_BEGIN: :on_embexpr_beg, + EMBEXPR_END: :on_embexpr_end, + EMBVAR: :on_embvar, + EOF: :on_eof, + EQUAL: :on_op, + EQUAL_EQUAL: :on_op, + EQUAL_EQUAL_EQUAL: :on_op, + EQUAL_GREATER: :on_op, + EQUAL_TILDE: :on_op, + FLOAT: :on_float, + FLOAT_IMAGINARY: :on_imaginary, + FLOAT_RATIONAL: :on_rational, + FLOAT_RATIONAL_IMAGINARY: :on_imaginary, + GREATER: :on_op, + GREATER_EQUAL: :on_op, + GREATER_GREATER: :on_op, + GREATER_GREATER_EQUAL: :on_op, + GLOBAL_VARIABLE: :on_gvar, + HEREDOC_END: :on_heredoc_end, + HEREDOC_START: :on_heredoc_beg, + IDENTIFIER: :on_ident, + IGNORED_NEWLINE: :on_ignored_nl, + INTEGER: :on_int, + INTEGER_IMAGINARY: :on_imaginary, + INTEGER_RATIONAL: :on_rational, + INTEGER_RATIONAL_IMAGINARY: :on_imaginary, + INSTANCE_VARIABLE: :on_ivar, + INVALID: :INVALID, + KEYWORD___ENCODING__: :on_kw, + KEYWORD___LINE__: :on_kw, + KEYWORD___FILE__: :on_kw, + KEYWORD_ALIAS: :on_kw, + KEYWORD_AND: :on_kw, + KEYWORD_BEGIN: :on_kw, + KEYWORD_BEGIN_UPCASE: :on_kw, + KEYWORD_BREAK: :on_kw, + KEYWORD_CASE: :on_kw, + KEYWORD_CLASS: :on_kw, + KEYWORD_DEF: :on_kw, + KEYWORD_DEFINED: :on_kw, + KEYWORD_DO: :on_kw, + KEYWORD_DO_LOOP: :on_kw, + KEYWORD_ELSE: :on_kw, + KEYWORD_ELSIF: :on_kw, + KEYWORD_END: :on_kw, + KEYWORD_END_UPCASE: :on_kw, + KEYWORD_ENSURE: :on_kw, + KEYWORD_FALSE: :on_kw, + KEYWORD_FOR: :on_kw, + KEYWORD_IF: :on_kw, + KEYWORD_IF_MODIFIER: :on_kw, + KEYWORD_IN: :on_kw, + KEYWORD_MODULE: :on_kw, + KEYWORD_NEXT: :on_kw, + KEYWORD_NIL: :on_kw, + KEYWORD_NOT: :on_kw, + KEYWORD_OR: :on_kw, + KEYWORD_REDO: :on_kw, + KEYWORD_RESCUE: :on_kw, + KEYWORD_RESCUE_MODIFIER: :on_kw, + KEYWORD_RETRY: :on_kw, + KEYWORD_RETURN: :on_kw, + KEYWORD_SELF: :on_kw, + KEYWORD_SUPER: :on_kw, + KEYWORD_THEN: :on_kw, + KEYWORD_TRUE: :on_kw, + KEYWORD_UNDEF: :on_kw, + KEYWORD_UNLESS: :on_kw, + KEYWORD_UNLESS_MODIFIER: :on_kw, + KEYWORD_UNTIL: :on_kw, + KEYWORD_UNTIL_MODIFIER: :on_kw, + KEYWORD_WHEN: :on_kw, + KEYWORD_WHILE: :on_kw, + KEYWORD_WHILE_MODIFIER: :on_kw, + KEYWORD_YIELD: :on_kw, + LABEL: :on_label, + LABEL_END: :on_label_end, + LAMBDA_BEGIN: :on_tlambeg, + LESS: :on_op, + LESS_EQUAL: :on_op, + LESS_EQUAL_GREATER: :on_op, + LESS_LESS: :on_op, + LESS_LESS_EQUAL: :on_op, + METHOD_NAME: :on_ident, + MINUS: :on_op, + MINUS_EQUAL: :on_op, + MINUS_GREATER: :on_tlambda, + NEWLINE: :on_nl, + NUMBERED_REFERENCE: :on_backref, + PARENTHESIS_LEFT: :on_lparen, + PARENTHESIS_LEFT_PARENTHESES: :on_lparen, + PARENTHESIS_RIGHT: :on_rparen, + PERCENT: :on_op, + PERCENT_EQUAL: :on_op, + PERCENT_LOWER_I: :on_qsymbols_beg, + PERCENT_LOWER_W: :on_qwords_beg, + PERCENT_LOWER_X: :on_backtick, + PERCENT_UPPER_I: :on_symbols_beg, + PERCENT_UPPER_W: :on_words_beg, + PIPE: :on_op, + PIPE_EQUAL: :on_op, + PIPE_PIPE: :on_op, + PIPE_PIPE_EQUAL: :on_op, + PLUS: :on_op, + PLUS_EQUAL: :on_op, + QUESTION_MARK: :on_op, + RATIONAL_FLOAT: :on_rational, + RATIONAL_INTEGER: :on_rational, + REGEXP_BEGIN: :on_regexp_beg, + REGEXP_END: :on_regexp_end, + SEMICOLON: :on_semicolon, + SLASH: :on_op, + SLASH_EQUAL: :on_op, + STAR: :on_op, + STAR_EQUAL: :on_op, + STAR_STAR: :on_op, + STAR_STAR_EQUAL: :on_op, + STRING_BEGIN: :on_tstring_beg, + STRING_CONTENT: :on_tstring_content, + STRING_END: :on_tstring_end, + SYMBOL_BEGIN: :on_symbeg, + TILDE: :on_op, + UAMPERSAND: :on_op, + UCOLON_COLON: :on_op, + UDOT_DOT: :on_op, + UDOT_DOT_DOT: :on_op, + UMINUS: :on_op, + UMINUS_NUM: :on_op, + UPLUS: :on_op, + USTAR: :on_op, + USTAR_STAR: :on_op, + WORDS_SEP: :on_words_sep, + "__END__": :on___end__ + }.freeze + + # When we produce tokens, we produce the same arrays that Ripper does. + # However, we add a couple of convenience methods onto them to make them a + # little easier to work with. We delegate all other methods to the array. + class Token < SimpleDelegator + # @dynamic initialize, each, [] + + # The location of the token in the source. + def location + self[0] + end + + # The type of the token. + def event + self[1] + end + + # The slice of the source that this token represents. + def value + self[2] + end + + # The state of the lexer when this token was produced. + def state + self[3] + end + end + + # Ripper doesn't include the rest of the token in the event, so we need to + # trim it down to just the content on the first line when comparing. + class EndContentToken < Token + def ==(other) # :nodoc: + [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other + end + end + + # Tokens where state should be ignored + # used for :on_comment, :on_heredoc_end, :on_embexpr_end + class IgnoreStateToken < Token + def ==(other) # :nodoc: + self[0...-1] == other[0...-1] + end + end + + # Ident tokens for the most part are exactly the same, except sometimes we + # know an ident is a local when ripper doesn't (when they are introduced + # through named captures in regular expressions). In that case we don't + # compare the state. + class IdentToken < Token + def ==(other) # :nodoc: + (self[0...-1] == other[0...-1]) && ( + (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || + (other[3] & Ripper::EXPR_ARG_ANY != 0) + ) + end + end + + # Ignored newlines can occasionally have a LABEL state attached to them, so + # we compare the state differently here. + class IgnoredNewlineToken < Token + def ==(other) # :nodoc: + return false unless self[0...-1] == other[0...-1] + + if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED + other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0 + else + self[3] == other[3] + end + end + end + + # If we have an identifier that follows a method name like: + # + # def foo bar + # + # then Ripper will mark bar as END|LABEL if there is a local in a parent + # scope named bar because it hasn't pushed the local table yet. We do this + # more accurately, so we need to allow comparing against both END and + # END|LABEL. + class ParamToken < Token + def ==(other) # :nodoc: + (self[0...-1] == other[0...-1]) && ( + (other[3] == Ripper::EXPR_END) || + (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) + ) + end + end + + # A heredoc in this case is a list of tokens that belong to the body of the + # heredoc that should be appended onto the list of tokens when the heredoc + # closes. + module Heredoc # :nodoc: + # Heredocs that are no dash or tilde heredocs are just a list of tokens. + # We need to keep them around so that we can insert them in the correct + # order back into the token stream and set the state of the last token to + # the state that the heredoc was opened in. + class PlainHeredoc # :nodoc: + attr_reader :tokens + + def initialize + @tokens = [] + end + + def <<(token) + tokens << token + end + + def to_a + tokens + end + end + + # Dash heredocs are a little more complicated. They are a list of tokens + # that need to be split on "\\\n" to mimic Ripper's behavior. We also need + # to keep track of the state that the heredoc was opened in. + class DashHeredoc # :nodoc: + attr_reader :split, :tokens + + def initialize(split) + @split = split + @tokens = [] + end + + def <<(token) + tokens << token + end + + def to_a + embexpr_balance = 0 + + tokens.each_with_object([]) do |token, results| #$ Array[Token] + case token.event + when :on_embexpr_beg + embexpr_balance += 1 + results << token + when :on_embexpr_end + embexpr_balance -= 1 + results << token + when :on_tstring_content + if embexpr_balance == 0 + lineno = token[0][0] + column = token[0][1] + + if split + # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind + # to keep the delimiter in the result. + token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| + column = 0 if index > 0 + results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + lineno += value.count("\n") + end + else + results << token + end + else + results << token + end + else + results << token + end + end + end + end + + # Heredocs that are dedenting heredocs are a little more complicated. + # Ripper outputs on_ignored_sp tokens for the whitespace that is being + # removed from the output. prism only modifies the node itself and keeps + # the token the same. This simplifies prism, but makes comparing against + # Ripper much harder because there is a length mismatch. + # + # Fortunately, we already have to pull out the heredoc tokens in order to + # insert them into the stream in the correct order. As such, we can do + # some extra manipulation on the tokens to make them match Ripper's + # output by mirroring the dedent logic that Ripper uses. + class DedentingHeredoc # :nodoc: + TAB_WIDTH = 8 + + attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance + + def initialize + @tokens = [] + @dedent_next = true + @dedent = nil + @embexpr_balance = 0 + @ended_on_newline = false + end + + # As tokens are coming in, we track the minimum amount of common leading + # whitespace on plain string content tokens. This allows us to later + # remove that amount of whitespace from the beginning of each line. + def <<(token) + case token.event + when :on_embexpr_beg, :on_heredoc_beg + @embexpr_balance += 1 + @dedent = 0 if @dedent_next && @ended_on_newline + when :on_embexpr_end, :on_heredoc_end + @embexpr_balance -= 1 + when :on_tstring_content + if embexpr_balance == 0 + line = token.value + + if dedent_next && !(line.strip.empty? && line.end_with?("\n")) + leading = line[/\A(\s*)\n?/, 1] + next_dedent = 0 + + leading.each_char do |char| + if char == "\t" + next_dedent = next_dedent - (next_dedent % TAB_WIDTH) + TAB_WIDTH + else + next_dedent += 1 + end + end + + @dedent = [dedent, next_dedent].compact.min + @dedent_next = true + @ended_on_newline = line.end_with?("\n") + tokens << token + return + end + end + end + + @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 + @ended_on_newline = false + tokens << token + end + + def to_a + # If every line in the heredoc is blank, we still need to split up the + # string content token into multiple tokens. + if dedent.nil? + results = [] #: Array[Token] + embexpr_balance = 0 + + tokens.each do |token| + case token.event + when :on_embexpr_beg, :on_heredoc_beg + embexpr_balance += 1 + results << token + when :on_embexpr_end, :on_heredoc_end + embexpr_balance -= 1 + results << token + when :on_tstring_content + if embexpr_balance == 0 + lineno = token[0][0] + column = token[0][1] + + token.value.split(/(?<=\n)/).each_with_index do |value, index| + column = 0 if index > 0 + results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + lineno += 1 + end + else + results << token + end + else + results << token + end + end + + return results + end + + # If the minimum common whitespace is 0, then we need to concatenate + # string nodes together that are immediately adjacent. + if dedent == 0 + results = [] #: Array[Token] + embexpr_balance = 0 + + index = 0 + max_index = tokens.length + + while index < max_index + token = tokens[index] + results << token + index += 1 + + case token.event + when :on_embexpr_beg, :on_heredoc_beg + embexpr_balance += 1 + when :on_embexpr_end, :on_heredoc_end + embexpr_balance -= 1 + when :on_tstring_content + if embexpr_balance == 0 + while index < max_index && tokens[index].event == :on_tstring_content + token.value << tokens[index].value + index += 1 + end + end + end + end + + return results + end + + # Otherwise, we're going to run through each token in the list and + # insert on_ignored_sp tokens for the amount of dedent that we need to + # perform. We also need to remove the dedent from the beginning of + # each line of plain string content tokens. + results = [] #: Array[Token] + dedent_next = true + embexpr_balance = 0 + + tokens.each do |token| + # Notice that the structure of this conditional largely matches the + # whitespace calculation we performed above. This is because + # checking if the subsequent token needs to be dedented is common to + # both the dedent calculation and the ignored_sp insertion. + case token.event + when :on_embexpr_beg + embexpr_balance += 1 + results << token + when :on_embexpr_end + embexpr_balance -= 1 + results << token + when :on_tstring_content + if embexpr_balance == 0 + # Here we're going to split the string on newlines, but maintain + # the newlines in the resulting array. We'll do that with a look + # behind assertion. + splits = token.value.split(/(?<=\n)/) + index = 0 + + while index < splits.length + line = splits[index] + lineno = token[0][0] + index + column = token[0][1] + + # Blank lines do not count toward common leading whitespace + # calculation and do not need to be dedented. + if dedent_next || index > 0 + column = 0 + end + + # If the dedent is 0 and we're not supposed to dedent the next + # line or this line doesn't start with whitespace, then we + # should concatenate the rest of the string to match ripper. + if dedent == 0 && (!dedent_next || !line.start_with?(/\s/)) + line = splits[index..].join + index = splits.length + end + + # If we are supposed to dedent this line or if this is not the + # first line of the string and this line isn't entirely blank, + # then we need to insert an on_ignored_sp token and remove the + # dedent from the beginning of the line. + if (dedent > 0) && (dedent_next || index > 0) + deleting = 0 + deleted_chars = [] #: Array[String] + + # Gather up all of the characters that we're going to + # delete, stopping when you hit a character that would put + # you over the dedent amount. + line.each_char.with_index do |char, i| + case char + when "\r" + if line[i + 1] == "\n" + break + end + when "\n" + break + when "\t" + deleting = deleting - (deleting % TAB_WIDTH) + TAB_WIDTH + else + deleting += 1 + end + + break if deleting > dedent + deleted_chars << char + end + + # If we have something to delete, then delete it from the + # string and insert an on_ignored_sp token. + if deleted_chars.any? + ignored = deleted_chars.join + line.delete_prefix!(ignored) + + results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) + column = ignored.length + end + end + + results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? + index += 1 + end + else + results << token + end + else + results << token + end + + dedent_next = + ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && + embexpr_balance == 0 + end + + results + end + end + + # Here we will split between the two types of heredocs and return the + # object that will store their tokens. + def self.build(opening) + case opening.value[2] + when "~" + DedentingHeredoc.new + when "-" + DashHeredoc.new(opening.value[3] != "'") + else + PlainHeredoc.new + end + end + end + + private_constant :Heredoc + + attr_reader :source, :options + + def initialize(source, **options) + @source = source + @options = options + end + + def result + tokens = [] #: Array[LexCompat::Token] + + state = :default + heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]] + + result = Prism.lex(source, **options) + result_value = result.value + previous_state = nil #: Ripper::Lexer::State? + last_heredoc_end = nil #: Integer? + + # In previous versions of Ruby, Ripper wouldn't flush the bom before the + # first token, so we had to have a hack in place to account for that. This + # checks for that behavior. + bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 + bom = source.byteslice(0..2) == "\xEF\xBB\xBF" + + result_value.each_with_index do |(token, lex_state), index| + lineno = token.location.start_line + column = token.location.start_column + + # If there's a UTF-8 byte-order mark as the start of the file, then for + # certain tokens ripper sets the first token back by 3 bytes. It also + # keeps the byte order mark in the first token's value. This is weird, + # and I don't want to mirror that in our parser. So instead, we'll match + # up the columns and values here. + if bom && lineno == 1 + column -= 3 + + if index == 0 && column == 0 && !bom_flushed + flushed = + case token.type + when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, + :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I, + :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I, + :PERCENT_UPPER_W, :STRING_BEGIN + true + when :REGEXP_BEGIN, :SYMBOL_BEGIN + token.value.start_with?("%") + else + false + end + + unless flushed + column -= 3 + value = token.value + value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding)) + end + end + end + + event = RIPPER.fetch(token.type) + value = token.value + lex_state = Ripper::Lexer::State.new(lex_state) + + token = + case event + when :on___end__ + EndContentToken.new([[lineno, column], event, value, lex_state]) + when :on_comment + IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + when :on_heredoc_end + # Heredoc end tokens can be emitted in an odd order, so we don't + # want to bother comparing the state on them. + last_heredoc_end = token.location.end_offset + IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + when :on_ident + if lex_state == Ripper::EXPR_END + # If we have an identifier that follows a method name like: + # + # def foo bar + # + # then Ripper will mark bar as END|LABEL if there is a local in a + # parent scope named bar because it hasn't pushed the local table + # yet. We do this more accurately, so we need to allow comparing + # against both END and END|LABEL. + ParamToken.new([[lineno, column], event, value, lex_state]) + elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL + # In the event that we're comparing identifiers, we're going to + # allow a little divergence. Ripper doesn't account for local + # variables introduced through named captures in regexes, and we + # do, which accounts for this difference. + IdentToken.new([[lineno, column], event, value, lex_state]) + else + Token.new([[lineno, column], event, value, lex_state]) + end + when :on_embexpr_end + IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + when :on_ignored_nl + # Ignored newlines can occasionally have a LABEL state attached to + # them which doesn't actually impact anything. We don't mirror that + # state so we ignored it. + IgnoredNewlineToken.new([[lineno, column], event, value, lex_state]) + when :on_regexp_end + # On regex end, Ripper scans and then sets end state, so the ripper + # lexed output is begin, when it should be end. prism sets lex state + # correctly to end state, but we want to be able to compare against + # Ripper's lexed state. So here, if it's a regexp end token, we + # output the state as the previous state, solely for the sake of + # comparison. + previous_token = result_value[index - 1][0] + lex_state = + if RIPPER.fetch(previous_token.type) == :on_embexpr_end + # If the previous token is embexpr_end, then we have to do even + # more processing. The end of an embedded expression sets the + # state to the state that it had at the beginning of the + # embedded expression. So we have to go and find that state and + # set it here. + counter = 1 + current_index = index - 1 + + until counter == 0 + current_index -= 1 + current_event = RIPPER.fetch(result_value[current_index][0].type) + counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 + end + + Ripper::Lexer::State.new(result_value[current_index][1]) + else + previous_state + end + + Token.new([[lineno, column], event, value, lex_state]) + when :on_eof + previous_token = result_value[index - 1][0] + + # If we're at the end of the file and the previous token was a + # comment and there is still whitespace after the comment, then + # Ripper will append a on_nl token (even though there isn't + # necessarily a newline). We mirror that here. + if previous_token.type == :COMMENT + # If the comment is at the start of a heredoc: <<HEREDOC # comment + # then the comment's end_offset is up near the heredoc_beg. + # This is not the correct offset to use for figuring out if + # there is trailing whitespace after the last token. + # Use the greater offset of the two to determine the start of + # the trailing whitespace. + start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max + end_offset = token.location.start_offset + + if start_offset < end_offset + if bom + start_offset += 3 + end_offset += 3 + end + + tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state]) + end + end + + Token.new([[lineno, column], event, value, lex_state]) + else + Token.new([[lineno, column], event, value, lex_state]) + end + + previous_state = lex_state + + # The order in which tokens appear in our lexer is different from the + # order that they appear in Ripper. When we hit the declaration of a + # heredoc in prism, we skip forward and lex the rest of the content of + # the heredoc before going back and lexing at the end of the heredoc + # identifier. + # + # To match up to ripper, we keep a small state variable around here to + # track whether we're in the middle of a heredoc or not. In this way we + # can shuffle around the token to match Ripper's output. + case state + when :default + # The default state is when there are no heredocs at all. In this + # state we can append the token to the list of tokens and move on. + tokens << token + + # If we get the declaration of a heredoc, then we open a new heredoc + # and move into the heredoc_opened state. + if event == :on_heredoc_beg + state = :heredoc_opened + heredoc_stack.last << Heredoc.build(token) + end + when :heredoc_opened + # The heredoc_opened state is when we've seen the declaration of a + # heredoc and are now lexing the body of the heredoc. In this state we + # push tokens onto the most recently created heredoc. + heredoc_stack.last.last << token + + case event + when :on_heredoc_beg + # If we receive a heredoc declaration while lexing the body of a + # heredoc, this means we have nested heredocs. In this case we'll + # push a new heredoc onto the stack and stay in the heredoc_opened + # state since we're now lexing the body of the new heredoc. + heredoc_stack << [Heredoc.build(token)] + when :on_heredoc_end + # If we receive the end of a heredoc, then we're done lexing the + # body of the heredoc. In this case we now have a completed heredoc + # but need to wait for the next newline to push it into the token + # stream. + state = :heredoc_closed + end + when :heredoc_closed + if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n")) + if heredoc_stack.size > 1 + flushing = heredoc_stack.pop + heredoc_stack.last.last << token + + flushing.each do |heredoc| + heredoc.to_a.each do |flushed_token| + heredoc_stack.last.last << flushed_token + end + end + + state = :heredoc_opened + next + end + elsif event == :on_heredoc_beg + tokens << token + state = :heredoc_opened + heredoc_stack.last << Heredoc.build(token) + next + elsif heredoc_stack.size > 1 + heredoc_stack[-2].last << token + next + end + + heredoc_stack.last.each do |heredoc| + tokens.concat(heredoc.to_a) + end + + heredoc_stack.last.clear + state = :default + + tokens << token + end + end + + # Drop the EOF token from the list + tokens = tokens[0...-1] + + # We sort by location to compare against Ripper's output + tokens.sort_by!(&:location) + + Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source)) + end + end + + private_constant :LexCompat + + # This is a class that wraps the Ripper lexer to produce almost exactly the + # same tokens. + class LexRipper # :nodoc: + attr_reader :source + + def initialize(source) + @source = source + end + + def result + previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] + results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] + + lex(source).each do |token| + case token[1] + when :on_sp + # skip + when :on_tstring_content + if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) + previous[2] << token[2] + else + results << token + previous = token + end + when :on_words_sep + if previous[1] == :on_words_sep + previous[2] << token[2] + else + results << token + previous = token + end + else + results << token + previous = token + end + end + + results + end + + private + + if Ripper.method(:lex).parameters.assoc(:keyrest) + def lex(source) + Ripper.lex(source, raise_errors: true) + end + else + def lex(source) + ripper = Ripper::Lexer.new(source) + ripper.lex.tap do |result| + raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? + end + end + end + end + + private_constant :LexRipper +end diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb new file mode 100644 index 0000000000..cdc4b2bf08 --- /dev/null +++ b/lib/prism/node_ext.rb @@ -0,0 +1,443 @@ +# frozen_string_literal: true + +# Here we are reopening the prism module to provide methods on nodes that aren't +# templated and are meant as convenience methods. +module Prism + class Node + def deprecated(*replacements) # :nodoc: + suggest = replacements.map { |replacement| "#{self.class}##{replacement}" } + warn(<<~MSG, category: :deprecated) + [deprecation]: #{self.class}##{caller_locations(1, 1)[0].label} is deprecated \ + and will be removed in the next major version. Use #{suggest.join("/")} instead. + #{(caller(1, 3) || []).join("\n")} + MSG + end + end + + module RegularExpressionOptions # :nodoc: + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + def options + o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE) + o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8) + o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT) + o + end + end + + class InterpolatedMatchLastLineNode < Node + include RegularExpressionOptions + end + + class InterpolatedRegularExpressionNode < Node + include RegularExpressionOptions + end + + class MatchLastLineNode < Node + include RegularExpressionOptions + end + + class RegularExpressionNode < Node + include RegularExpressionOptions + end + + private_constant :RegularExpressionOptions + + module HeredocQuery # :nodoc: + # Returns true if this node was represented as a heredoc in the source code. + def heredoc? + opening&.start_with?("<<") + end + end + + class InterpolatedStringNode < Node + include HeredocQuery + end + + class InterpolatedXStringNode < Node + include HeredocQuery + end + + class StringNode < Node + include HeredocQuery + + # Occasionally it's helpful to treat a string as if it were interpolated so + # that there's a consistent interface for working with strings. + def to_interpolated + InterpolatedStringNode.new( + source, + frozen? ? InterpolatedStringNodeFlags::FROZEN : 0, + opening_loc, + [copy(opening_loc: nil, closing_loc: nil, location: content_loc)], + closing_loc, + location + ) + end + end + + class XStringNode < Node + include HeredocQuery + + # Occasionally it's helpful to treat a string as if it were interpolated so + # that there's a consistent interface for working with strings. + def to_interpolated + InterpolatedXStringNode.new( + source, + opening_loc, + [StringNode.new(source, 0, nil, content_loc, nil, unescaped, content_loc)], + closing_loc, + location + ) + end + end + + private_constant :HeredocQuery + + class ImaginaryNode < Node + # Returns the value of the node as a Ruby Complex. + def value + Complex(0, numeric.value) + end + end + + class RationalNode < Node + # Returns the value of the node as a Ruby Rational. + def value + Rational(numerator, denominator) + end + + # Returns the value of the node as an IntegerNode or a FloatNode. This + # method is deprecated in favor of #value or #numerator/#denominator. + def numeric + deprecated("value", "numerator", "denominator") + + if denominator == 1 + IntegerNode.new(source, flags, numerator, location.chop) + else + FloatNode.new(source, numerator.to_f / denominator, location.chop) + end + end + end + + class ConstantReadNode < Node + # Returns the list of parts for the full name of this constant. + # For example: [:Foo] + def full_name_parts + [name] + end + + # Returns the full name of this constant. For example: "Foo" + def full_name + name.to_s + end + end + + class ConstantWriteNode < Node + # Returns the list of parts for the full name of this constant. + # For example: [:Foo] + def full_name_parts + [name] + end + + # Returns the full name of this constant. For example: "Foo" + def full_name + name.to_s + end + end + + class ConstantPathNode < Node + # An error class raised when dynamic parts are found while computing a + # constant path's full name. For example: + # Foo::Bar::Baz -> does not raise because all parts of the constant path are + # simple constants + # var::Bar::Baz -> raises because the first part of the constant path is a + # local variable + class DynamicPartsInConstantPathError < StandardError; end + + # An error class raised when missing nodes are found while computing a + # constant path's full name. For example: + # Foo:: -> raises because the constant path is missing the last part + class MissingNodesInConstantPathError < StandardError; end + + # Returns the list of parts for the full name of this constant path. + # For example: [:Foo, :Bar] + def full_name_parts + parts = [] #: Array[Symbol] + current = self #: node? + + while current.is_a?(ConstantPathNode) + name = current.name + if name.nil? + raise MissingNodesInConstantPathError, "Constant path contains missing nodes. Cannot compute full name" + end + + parts.unshift(name) + current = current.parent + end + + if !current.is_a?(ConstantReadNode) && !current.nil? + raise DynamicPartsInConstantPathError, "Constant path contains dynamic parts. Cannot compute full name" + end + + parts.unshift(current&.name || :"") + end + + # Returns the full name of this constant path. For example: "Foo::Bar" + def full_name + full_name_parts.join("::") + end + + # Previously, we had a child node on this class that contained either a + # constant read or a missing node. To not cause a breaking change, we + # continue to supply that API. + def child + deprecated("name", "name_loc") + name ? ConstantReadNode.new(source, name, name_loc) : MissingNode.new(source, location) + end + end + + class ConstantPathTargetNode < Node + # Returns the list of parts for the full name of this constant path. + # For example: [:Foo, :Bar] + def full_name_parts + parts = + case parent + when ConstantPathNode, ConstantReadNode + parent.full_name_parts + when nil + [:""] + else + # e.g. self::Foo, (var)::Bar = baz + raise ConstantPathNode::DynamicPartsInConstantPathError, "Constant target path contains dynamic parts. Cannot compute full name" + end + + if name.nil? + raise ConstantPathNode::MissingNodesInConstantPathError, "Constant target path contains missing nodes. Cannot compute full name" + end + + parts.push(name) + end + + # Returns the full name of this constant path. For example: "Foo::Bar" + def full_name + full_name_parts.join("::") + end + + # Previously, we had a child node on this class that contained either a + # constant read or a missing node. To not cause a breaking change, we + # continue to supply that API. + def child + deprecated("name", "name_loc") + name ? ConstantReadNode.new(source, name, name_loc) : MissingNode.new(source, location) + end + end + + class ConstantTargetNode < Node + # Returns the list of parts for the full name of this constant. + # For example: [:Foo] + def full_name_parts + [name] + end + + # Returns the full name of this constant. For example: "Foo" + def full_name + name.to_s + end + end + + class ParametersNode < Node + # Mirrors the Method#parameters method. + def signature + names = [] #: Array[[Symbol, Symbol] | [Symbol]] + + requireds.each do |param| + names << (param.is_a?(MultiTargetNode) ? [:req] : [:req, param.name]) + end + + optionals.each { |param| names << [:opt, param.name] } + + if rest && rest.is_a?(RestParameterNode) + names << [:rest, rest.name || :*] + end + + posts.each do |param| + if param.is_a?(MultiTargetNode) + names << [:req] + elsif param.is_a?(NoKeywordsParameterNode) + # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts + raise "Invalid syntax" + else + names << [:req, param.name] + end + end + + # Regardless of the order in which the keywords were defined, the required + # keywords always come first followed by the optional keywords. + keyopt = [] #: Array[OptionalKeywordParameterNode] + keywords.each do |param| + if param.is_a?(OptionalKeywordParameterNode) + keyopt << param + else + names << [:keyreq, param.name] + end + end + + keyopt.each { |param| names << [:key, param.name] } + + case keyword_rest + when ForwardingParameterNode + names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]]) + when KeywordRestParameterNode + names << [:keyrest, keyword_rest.name || :**] + when NoKeywordsParameterNode + names << [:nokey] + end + + names << [:block, block.name || :&] if block + names + end + end + + class CallNode < Node + # When a call node has the attribute_write flag set, it means that the call + # is using the attribute write syntax. This is either a method call to []= + # or a method call to a method that ends with =. Either way, the = sign is + # present in the source. + # + # Prism returns the message_loc _without_ the = sign attached, because there + # can be any amount of space between the message and the = sign. However, + # sometimes you want the location of the full message including the inner + # space and the = sign. This method provides that. + def full_message_loc + attribute_write? ? message_loc&.adjoin("=") : message_loc + end + end + + class CallOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class ClassVariableOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class ConstantOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class ConstantPathOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class GlobalVariableOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class IndexOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class InstanceVariableOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end + + class LocalVariableOperatorWriteNode < Node + # Returns the binary operator used to modify the receiver. This method is + # deprecated in favor of #binary_operator. + def operator + deprecated("binary_operator") + binary_operator + end + + # Returns the location of the binary operator used to modify the receiver. + # This method is deprecated in favor of #binary_operator_loc. + def operator_loc + deprecated("binary_operator_loc") + binary_operator_loc + end + end +end diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb new file mode 100644 index 0000000000..c0de8ab8b7 --- /dev/null +++ b/lib/prism/pack.rb @@ -0,0 +1,228 @@ +# frozen_string_literal: true +# typed: ignore + +module Prism + # A parser for the pack template language. + module Pack + %i[ + SPACE + COMMENT + INTEGER + UTF8 + BER + FLOAT + STRING_SPACE_PADDED + STRING_NULL_PADDED + STRING_NULL_TERMINATED + STRING_MSB + STRING_LSB + STRING_HEX_HIGH + STRING_HEX_LOW + STRING_UU + STRING_MIME + STRING_BASE64 + STRING_FIXED + STRING_POINTER + MOVE + BACK + NULL + + UNSIGNED + SIGNED + SIGNED_NA + + AGNOSTIC_ENDIAN + LITTLE_ENDIAN + BIG_ENDIAN + NATIVE_ENDIAN + ENDIAN_NA + + SIZE_SHORT + SIZE_INT + SIZE_LONG + SIZE_LONG_LONG + SIZE_8 + SIZE_16 + SIZE_32 + SIZE_64 + SIZE_P + SIZE_NA + + LENGTH_FIXED + LENGTH_MAX + LENGTH_RELATIVE + LENGTH_NA + ].each do |const| + const_set(const, const) + end + + # A directive in the pack template language. + class Directive + # A symbol representing the version of Ruby. + attr_reader :version + + # A symbol representing whether or not we are packing or unpacking. + attr_reader :variant + + # A byteslice of the source string that this directive represents. + attr_reader :source + + # The type of the directive. + attr_reader :type + + # The type of signedness of the directive. + attr_reader :signed + + # The type of endianness of the directive. + attr_reader :endian + + # The size of the directive. + attr_reader :size + + # The length type of this directive (used for integers). + attr_reader :length_type + + # The length of this directive (used for integers). + attr_reader :length + + # Initialize a new directive with the given values. + def initialize(version, variant, source, type, signed, endian, size, length_type, length) + @version = version + @variant = variant + @source = source + @type = type + @signed = signed + @endian = endian + @size = size + @length_type = length_type + @length = length + end + + # The descriptions of the various types of endianness. + ENDIAN_DESCRIPTIONS = { + AGNOSTIC_ENDIAN: "agnostic", + LITTLE_ENDIAN: "little-endian (VAX)", + BIG_ENDIAN: "big-endian (network)", + NATIVE_ENDIAN: "native-endian", + ENDIAN_NA: "n/a" + } + + # The descriptions of the various types of signedness. + SIGNED_DESCRIPTIONS = { + UNSIGNED: "unsigned", + SIGNED: "signed", + SIGNED_NA: "n/a" + } + + # The descriptions of the various types of sizes. + SIZE_DESCRIPTIONS = { + SIZE_SHORT: "short", + SIZE_INT: "int-width", + SIZE_LONG: "long", + SIZE_LONG_LONG: "long long", + SIZE_8: "8-bit", + SIZE_16: "16-bit", + SIZE_32: "32-bit", + SIZE_64: "64-bit", + SIZE_P: "pointer-width" + } + + # Provide a human-readable description of the directive. + def describe + case type + when SPACE + "whitespace" + when COMMENT + "comment" + when INTEGER + if size == SIZE_8 + base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer" + else + base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer" + end + case length_type + when LENGTH_FIXED + if length > 1 + base + ", x#{length}" + else + base + end + when LENGTH_MAX + base + ", as many as possible" + else + raise + end + when UTF8 + "UTF-8 character" + when BER + "BER-compressed integer" + when FLOAT + "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float" + when STRING_SPACE_PADDED + "arbitrary binary string (space padded)" + when STRING_NULL_PADDED + "arbitrary binary string (null padded, count is width)" + when STRING_NULL_TERMINATED + "arbitrary binary string (null padded, count is width), except that null is added with *" + when STRING_MSB + "bit string (MSB first)" + when STRING_LSB + "bit string (LSB first)" + when STRING_HEX_HIGH + "hex string (high nibble first)" + when STRING_HEX_LOW + "hex string (low nibble first)" + when STRING_UU + "UU-encoded string" + when STRING_MIME + "quoted printable, MIME encoding" + when STRING_BASE64 + "base64 encoded string" + when STRING_FIXED + "pointer to a structure (fixed-length string)" + when STRING_POINTER + "pointer to a null-terminated string" + when MOVE + "move to absolute position" + when BACK + "back up a byte" + when NULL + "null byte" + else + raise + end + end + end + + # The result of parsing a pack template. + class Format + # A list of the directives in the template. + attr_reader :directives + + # The encoding of the template. + attr_reader :encoding + + # Create a new Format with the given directives and encoding. + def initialize(directives, encoding) + @directives = directives + @encoding = encoding + end + + # Provide a human-readable description of the format. + def describe + source_width = directives.map { |d| d.source.inspect.length }.max + directive_lines = directives.map do |directive| + if directive.type == SPACE + source = directive.source.inspect + else + source = directive.source + end + # @type var source_width: Integer + " #{source.ljust(source_width)} #{directive.describe}" + end + + (["Directives:"] + directive_lines + ["Encoding:", " #{encoding}"]).join("\n") + end + end + end +end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb new file mode 100644 index 0000000000..798fde09e5 --- /dev/null +++ b/lib/prism/parse_result.rb @@ -0,0 +1,698 @@ +# frozen_string_literal: true + +module Prism + # This represents a source of Ruby code that has been parsed. It is used in + # conjunction with locations to allow them to resolve line numbers and source + # ranges. + class Source + # Create a new source object with the given source code. This method should + # be used instead of `new` and it will return either a `Source` or a + # specialized and more performant `ASCIISource` if no multibyte characters + # are present in the source code. + def self.for(source, start_line = 1, offsets = []) + source.ascii_only? ? ASCIISource.new(source, start_line, offsets): new(source, start_line, offsets) + end + + # The source code that this source object represents. + attr_reader :source + + # The line number where this source starts. + attr_reader :start_line + + # The list of newline byte offsets in the source code. + attr_reader :offsets + + # Create a new source object with the given source code. + def initialize(source, start_line = 1, offsets = []) + @source = source + @start_line = start_line # set after parsing is done + @offsets = offsets # set after parsing is done + end + + # Returns the encoding of the source code, which is set by parameters to the + # parser or by the encoding magic comment. + def encoding + source.encoding + end + + # Returns the lines of the source code as an array of strings. + def lines + source.lines + end + + # Perform a byteslice on the source code using the given byte offset and + # byte length. + def slice(byte_offset, length) + source.byteslice(byte_offset, length) or raise + end + + # Binary search through the offsets to find the line number for the given + # byte offset. + def line(byte_offset) + start_line + find_line(byte_offset) + end + + # Return the byte offset of the start of the line corresponding to the given + # byte offset. + def line_start(byte_offset) + offsets[find_line(byte_offset)] + end + + # Returns the byte offset of the end of the line corresponding to the given + # byte offset. + def line_end(byte_offset) + offsets[find_line(byte_offset) + 1] || source.bytesize + end + + # Return the column number for the given byte offset. + def column(byte_offset) + byte_offset - line_start(byte_offset) + end + + # Return the character offset for the given byte offset. + def character_offset(byte_offset) + (source.byteslice(0, byte_offset) or raise).length + end + + # Return the column number in characters for the given byte offset. + def character_column(byte_offset) + character_offset(byte_offset) - character_offset(line_start(byte_offset)) + end + + # Returns the offset from the start of the file for the given byte offset + # counting in code units for the given encoding. + # + # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the + # concept of code units that differs from the number of characters in other + # encodings, it is not captured here. + def code_units_offset(byte_offset, encoding) + byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding) + (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length + end + + # Returns the column number in code units for the given encoding for the + # given byte offset. + def code_units_column(byte_offset, encoding) + code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) + end + + private + + # Binary search through the offsets to find the line number for the given + # byte offset. + def find_line(byte_offset) + left = 0 + right = offsets.length - 1 + + while left <= right + mid = left + (right - left) / 2 + return mid if (offset = offsets[mid]) == byte_offset + + if offset < byte_offset + left = mid + 1 + else + right = mid - 1 + end + end + + left - 1 + end + end + + # Specialized version of Prism::Source for source code that includes ASCII + # characters only. This class is used to apply performance optimizations that + # cannot be applied to sources that include multibyte characters. Sources that + # include multibyte characters are represented by the Prism::Source class. + class ASCIISource < Source + # Return the character offset for the given byte offset. + def character_offset(byte_offset) + byte_offset + end + + # Return the column number in characters for the given byte offset. + def character_column(byte_offset) + byte_offset - line_start(byte_offset) + end + + # Returns the offset from the start of the file for the given byte offset + # counting in code units for the given encoding. + # + # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the + # concept of code units that differs from the number of characters in other + # encodings, it is not captured here. + def code_units_offset(byte_offset, encoding) + byte_offset + end + + # Specialized version of `code_units_column` that does not depend on + # `code_units_offset`, which is a more expensive operation. This is + # essentialy the same as `Prism::Source#column`. + def code_units_column(byte_offset, encoding) + byte_offset - line_start(byte_offset) + end + end + + # This represents a location in the source. + class Location + # A Source object that is used to determine more information from the given + # offset and length. + attr_reader :source + protected :source + + # The byte offset from the beginning of the source where this location + # starts. + attr_reader :start_offset + + # The length of this location in bytes. + attr_reader :length + + # Create a new location object with the given source, start byte offset, and + # byte length. + def initialize(source, start_offset, length) + @source = source + @start_offset = start_offset + @length = length + + # These are used to store comments that are associated with this location. + # They are initialized to `nil` to save on memory when there are no + # comments to be attached and/or the comment-related APIs are not used. + @leading_comments = nil + @trailing_comments = nil + end + + # These are the comments that are associated with this location that exist + # before the start of this location. + def leading_comments + @leading_comments ||= [] + end + + # Attach a comment to the leading comments of this location. + def leading_comment(comment) + leading_comments << comment + end + + # These are the comments that are associated with this location that exist + # after the end of this location. + def trailing_comments + @trailing_comments ||= [] + end + + # Attach a comment to the trailing comments of this location. + def trailing_comment(comment) + trailing_comments << comment + end + + # Returns all comments that are associated with this location (both leading + # and trailing comments). + def comments + [*@leading_comments, *@trailing_comments] + end + + # Create a new location object with the given options. + def copy(source: self.source, start_offset: self.start_offset, length: self.length) + Location.new(source, start_offset, length) + end + + # Returns a new location that is the result of chopping off the last byte. + def chop + copy(length: length == 0 ? length : length - 1) + end + + # Returns a string representation of this location. + def inspect + "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>" + end + + # Returns all of the lines of the source code associated with this location. + def source_lines + source.lines + end + + # The source code that this location represents. + def slice + source.slice(start_offset, length) + end + + # The source code that this location represents starting from the beginning + # of the line that this location starts on to the end of the line that this + # location ends on. + def slice_lines + line_start = source.line_start(start_offset) + line_end = source.line_end(end_offset) + source.slice(line_start, line_end - line_start) + end + + # The character offset from the beginning of the source where this location + # starts. + def start_character_offset + source.character_offset(start_offset) + end + + # The offset from the start of the file in code units of the given encoding. + def start_code_units_offset(encoding = Encoding::UTF_16LE) + source.code_units_offset(start_offset, encoding) + end + + # The byte offset from the beginning of the source where this location ends. + def end_offset + start_offset + length + end + + # The character offset from the beginning of the source where this location + # ends. + def end_character_offset + source.character_offset(end_offset) + end + + # The offset from the start of the file in code units of the given encoding. + def end_code_units_offset(encoding = Encoding::UTF_16LE) + source.code_units_offset(end_offset, encoding) + end + + # The line number where this location starts. + def start_line + source.line(start_offset) + end + + # The content of the line where this location starts before this location. + def start_line_slice + offset = source.line_start(start_offset) + source.slice(offset, start_offset - offset) + end + + # The line number where this location ends. + def end_line + source.line(end_offset) + end + + # The column number in bytes where this location starts from the start of + # the line. + def start_column + source.column(start_offset) + end + + # The column number in characters where this location ends from the start of + # the line. + def start_character_column + source.character_column(start_offset) + end + + # The column number in code units of the given encoding where this location + # starts from the start of the line. + def start_code_units_column(encoding = Encoding::UTF_16LE) + source.code_units_column(start_offset, encoding) + end + + # The column number in bytes where this location ends from the start of the + # line. + def end_column + source.column(end_offset) + end + + # The column number in characters where this location ends from the start of + # the line. + def end_character_column + source.character_column(end_offset) + end + + # The column number in code units of the given encoding where this location + # ends from the start of the line. + def end_code_units_column(encoding = Encoding::UTF_16LE) + source.code_units_column(end_offset, encoding) + end + + # Implement the hash pattern matching interface for Location. + def deconstruct_keys(keys) + { start_offset: start_offset, end_offset: end_offset } + end + + # Implement the pretty print interface for Location. + def pretty_print(q) + q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})") + end + + # Returns true if the given other location is equal to this location. + def ==(other) + Location === other && + other.start_offset == start_offset && + other.end_offset == end_offset + end + + # Returns a new location that stretches from this location to the given + # other location. Raises an error if this location is not before the other + # location or if they don't share the same source. + def join(other) + raise "Incompatible sources" if source != other.source + raise "Incompatible locations" if start_offset > other.start_offset + + Location.new(source, start_offset, other.end_offset - start_offset) + end + + # Join this location with the first occurrence of the string in the source + # that occurs after this location on the same line, and return the new + # location. This will raise an error if the string does not exist. + def adjoin(string) + line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset) + + line_suffix_index = line_suffix.byteindex(string) + raise "Could not find #{string}" if line_suffix_index.nil? + + Location.new(source, start_offset, length + line_suffix_index + string.bytesize) + end + end + + # This represents a comment that was encountered during parsing. It is the + # base class for all comment types. + class Comment + # The location of this comment in the source. + attr_reader :location + + # Create a new comment object with the given location. + def initialize(location) + @location = location + end + + # Implement the hash pattern matching interface for Comment. + def deconstruct_keys(keys) + { location: location } + end + + # Returns the content of the comment by slicing it from the source code. + def slice + location.slice + end + end + + # InlineComment objects are the most common. They correspond to comments in + # the source file like this one that start with #. + class InlineComment < Comment + # Returns true if this comment happens on the same line as other code and + # false if the comment is by itself. + def trailing? + !location.start_line_slice.strip.empty? + end + + # Returns a string representation of this comment. + def inspect + "#<Prism::InlineComment @location=#{location.inspect}>" + end + end + + # EmbDocComment objects correspond to comments that are surrounded by =begin + # and =end. + class EmbDocComment < Comment + # This can only be true for inline comments. + def trailing? + false + end + + # Returns a string representation of this comment. + def inspect + "#<Prism::EmbDocComment @location=#{location.inspect}>" + end + end + + # This represents a magic comment that was encountered during parsing. + class MagicComment + # A Location object representing the location of the key in the source. + attr_reader :key_loc + + # A Location object representing the location of the value in the source. + attr_reader :value_loc + + # Create a new magic comment object with the given key and value locations. + def initialize(key_loc, value_loc) + @key_loc = key_loc + @value_loc = value_loc + end + + # Returns the key of the magic comment by slicing it from the source code. + def key + key_loc.slice + end + + # Returns the value of the magic comment by slicing it from the source code. + def value + value_loc.slice + end + + # Implement the hash pattern matching interface for MagicComment. + def deconstruct_keys(keys) + { key_loc: key_loc, value_loc: value_loc } + end + + # Returns a string representation of this magic comment. + def inspect + "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>" + end + end + + # This represents an error that was encountered during parsing. + class ParseError + # The type of error. This is an _internal_ symbol that is used for + # communicating with translation layers. It is not meant to be public API. + attr_reader :type + + # The message associated with this error. + attr_reader :message + + # A Location object representing the location of this error in the source. + attr_reader :location + + # The level of this error. + attr_reader :level + + # Create a new error object with the given message and location. + def initialize(type, message, location, level) + @type = type + @message = message + @location = location + @level = level + end + + # Implement the hash pattern matching interface for ParseError. + def deconstruct_keys(keys) + { type: type, message: message, location: location, level: level } + end + + # Returns a string representation of this error. + def inspect + "#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" + end + end + + # This represents a warning that was encountered during parsing. + class ParseWarning + # The type of warning. This is an _internal_ symbol that is used for + # communicating with translation layers. It is not meant to be public API. + attr_reader :type + + # The message associated with this warning. + attr_reader :message + + # A Location object representing the location of this warning in the source. + attr_reader :location + + # The level of this warning. + attr_reader :level + + # Create a new warning object with the given message and location. + def initialize(type, message, location, level) + @type = type + @message = message + @location = location + @level = level + end + + # Implement the hash pattern matching interface for ParseWarning. + def deconstruct_keys(keys) + { type: type, message: message, location: location, level: level } + end + + # Returns a string representation of this warning. + def inspect + "#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" + end + end + + # This represents the result of a call to ::parse or ::parse_file. It contains + # the requested structure, any comments that were encounters, and any errors + # that were encountered. + class Result + # The list of comments that were encountered during parsing. + attr_reader :comments + + # The list of magic comments that were encountered during parsing. + attr_reader :magic_comments + + # An optional location that represents the location of the __END__ marker + # and the rest of the content of the file. This content is loaded into the + # DATA constant when the file being parsed is the main file being executed. + attr_reader :data_loc + + # The list of errors that were generated during parsing. + attr_reader :errors + + # The list of warnings that were generated during parsing. + attr_reader :warnings + + # A Source instance that represents the source code that was parsed. + attr_reader :source + + # Create a new result object with the given values. + def initialize(comments, magic_comments, data_loc, errors, warnings, source) + @comments = comments + @magic_comments = magic_comments + @data_loc = data_loc + @errors = errors + @warnings = warnings + @source = source + end + + # Implement the hash pattern matching interface for Result. + def deconstruct_keys(keys) + { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } + end + + # Returns the encoding of the source code that was parsed. + def encoding + source.encoding + end + + # Returns true if there were no errors during parsing and false if there + # were. + def success? + errors.empty? + end + + # Returns true if there were errors during parsing and false if there were + # not. + def failure? + !success? + end + end + + # This is a result specific to the `parse` and `parse_file` methods. + class ParseResult < Result + autoload :Comments, "prism/parse_result/comments" + autoload :Newlines, "prism/parse_result/newlines" + + private_constant :Comments + private_constant :Newlines + + # The syntax tree that was parsed from the source code. + attr_reader :value + + # Create a new parse result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for ParseResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + + # Attach the list of comments to their respective locations in the tree. + def attach_comments! + Comments.new(self).attach! # steep:ignore + end + + # Walk the tree and mark nodes that are on a new line, loosely emulating + # the behavior of CRuby's `:line` tracepoint event. + def mark_newlines! + value.accept(Newlines.new(source.offsets.size)) # steep:ignore + end + end + + # This is a result specific to the `lex` and `lex_file` methods. + class LexResult < Result + # The list of tokens that were parsed from the source code. + attr_reader :value + + # Create a new lex result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for LexResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This is a result specific to the `parse_lex` and `parse_lex_file` methods. + class ParseLexResult < Result + # A tuple of the syntax tree and the list of tokens that were parsed from + # the source code. + attr_reader :value + + # Create a new parse lex result object with the given values. + def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, source) + end + + # Implement the hash pattern matching interface for ParseLexResult. + def deconstruct_keys(keys) + super.merge!(value: value) + end + end + + # This represents a token from the Ruby source. + class Token + # The Source object that represents the source this token came from. + attr_reader :source + private :source + + # The type of token that this token is. + attr_reader :type + + # A byteslice of the source that this token represents. + attr_reader :value + + # Create a new token object with the given type, value, and location. + def initialize(source, type, value, location) + @source = source + @type = type + @value = value + @location = location + end + + # Implement the hash pattern matching interface for Token. + def deconstruct_keys(keys) + { type: type, value: value, location: location } + end + + # A Location object representing the location of this token in the source. + def location + location = @location + return location if location.is_a?(Location) + @location = Location.new(source, location >> 32, location & 0xFFFFFFFF) + end + + # Implement the pretty print interface for Token. + def pretty_print(q) + q.group do + q.text(type.to_s) + self.location.pretty_print(q) + q.text("(") + q.nest(2) do + q.breakable("") + q.pp(value) + end + q.breakable("") + q.text(")") + end + end + + # Returns true if the given other token is equal to this token. + def ==(other) + Token === other && + other.type == type && + other.value == value + end + end +end diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb new file mode 100644 index 0000000000..22c4148b2c --- /dev/null +++ b/lib/prism/parse_result/comments.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +module Prism + class ParseResult < Result + # When we've parsed the source, we have both the syntax tree and the list of + # comments that we found in the source. This class is responsible for + # walking the tree and finding the nearest location to attach each comment. + # + # It does this by first finding the nearest locations to each comment. + # Locations can either come from nodes directly or from location fields on + # nodes. For example, a `ClassNode` has an overall location encompassing the + # entire class, but it also has a location for the `class` keyword. + # + # Once the nearest locations are found, it determines which one to attach + # to. If it's a trailing comment (a comment on the same line as other source + # code), it will favor attaching to the nearest location that occurs before + # the comment. Otherwise it will favor attaching to the nearest location + # that is after the comment. + class Comments + # A target for attaching comments that is based on a specific node's + # location. + class NodeTarget # :nodoc: + attr_reader :node + + def initialize(node) + @node = node + end + + def start_offset + node.start_offset + end + + def end_offset + node.end_offset + end + + def encloses?(comment) + start_offset <= comment.location.start_offset && + comment.location.end_offset <= end_offset + end + + def leading_comment(comment) + node.location.leading_comment(comment) + end + + def trailing_comment(comment) + node.location.trailing_comment(comment) + end + end + + # A target for attaching comments that is based on a location field on a + # node. For example, the `end` token of a ClassNode. + class LocationTarget # :nodoc: + attr_reader :location + + def initialize(location) + @location = location + end + + def start_offset + location.start_offset + end + + def end_offset + location.end_offset + end + + def encloses?(comment) + false + end + + def leading_comment(comment) + location.leading_comment(comment) + end + + def trailing_comment(comment) + location.trailing_comment(comment) + end + end + + # The parse result that we are attaching comments to. + attr_reader :parse_result + + # Create a new Comments object that will attach comments to the given + # parse result. + def initialize(parse_result) + @parse_result = parse_result + end + + # Attach the comments to their respective locations in the tree by + # mutating the parse result. + def attach! + parse_result.comments.each do |comment| + preceding, enclosing, following = nearest_targets(parse_result.value, comment) + + if comment.trailing? + if preceding + preceding.trailing_comment(comment) + else + (following || enclosing || NodeTarget.new(parse_result.value)).leading_comment(comment) + end + else + # If a comment exists on its own line, prefer a leading comment. + if following + following.leading_comment(comment) + elsif preceding + preceding.trailing_comment(comment) + else + (enclosing || NodeTarget.new(parse_result.value)).leading_comment(comment) + end + end + end + end + + private + + # Responsible for finding the nearest targets to the given comment within + # the context of the given encapsulating node. + def nearest_targets(node, comment) + comment_start = comment.location.start_offset + comment_end = comment.location.end_offset + + targets = [] #: Array[_Target] + node.comment_targets.map do |value| + case value + when StatementsNode + targets.concat(value.body.map { |node| NodeTarget.new(node) }) + when Node + targets << NodeTarget.new(value) + when Location + targets << LocationTarget.new(value) + end + end + + targets.sort_by!(&:start_offset) + preceding = nil #: _Target? + following = nil #: _Target? + + left = 0 + right = targets.length + + # This is a custom binary search that finds the nearest nodes to the + # given comment. When it finds a node that completely encapsulates the + # comment, it recurses downward into the tree. + while left < right + middle = (left + right) / 2 + target = targets[middle] + + target_start = target.start_offset + target_end = target.end_offset + + if target.encloses?(comment) + # @type var target: NodeTarget + # The comment is completely contained by this target. Abandon the + # binary search at this level. + return nearest_targets(target.node, comment) + end + + if target_end <= comment_start + # This target falls completely before the comment. Because we will + # never consider this target or any targets before it again, this + # target must be the closest preceding target we have encountered so + # far. + preceding = target + left = middle + 1 + next + end + + if comment_end <= target_start + # This target falls completely after the comment. Because we will + # never consider this target or any targets after it again, this + # target must be the closest following target we have encountered so + # far. + following = target + right = middle + next + end + + # This should only happen if there is a bug in this parser. + raise "Comment location overlaps with a target location" + end + + [preceding, NodeTarget.new(node), following] + end + end + end +end diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb new file mode 100644 index 0000000000..cc1343dfda --- /dev/null +++ b/lib/prism/parse_result/newlines.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +module Prism + class ParseResult < Result + # The :line tracepoint event gets fired whenever the Ruby VM encounters an + # expression on a new line. The types of expressions that can trigger this + # event are: + # + # * if statements + # * unless statements + # * nodes that are children of statements lists + # + # In order to keep track of the newlines, we have a list of offsets that + # come back from the parser. We assign these offsets to the first nodes that + # we find in the tree that are on those lines. + # + # Note that the logic in this file should be kept in sync with the Java + # MarkNewlinesVisitor, since that visitor is responsible for marking the + # newlines for JRuby/TruffleRuby. + # + # This file is autoloaded only when `mark_newlines!` is called, so the + # re-opening of the various nodes in this file will only be performed in + # that case. We do that to avoid storing the extra `@newline` instance + # variable on every node if we don't need it. + class Newlines < Visitor + # Create a new Newlines visitor with the given newline offsets. + def initialize(lines) + @lines = Array.new(1 + lines, false) + end + + # Permit block/lambda nodes to mark newlines within themselves. + def visit_block_node(node) + old_lines = @lines + @lines = Array.new(old_lines.size, false) + + begin + super(node) + ensure + @lines = old_lines + end + end + + alias_method :visit_lambda_node, :visit_block_node + + # Mark if/unless nodes as newlines. + def visit_if_node(node) + node.newline!(@lines) + super(node) + end + + alias_method :visit_unless_node, :visit_if_node + + # Permit statements lists to mark newlines within themselves. + def visit_statements_node(node) + node.body.each do |child| + child.newline!(@lines) + end + super(node) + end + end + end + + class Node + def newline? # :nodoc: + @newline ? true : false + end + + def newline!(lines) # :nodoc: + line = location.start_line + unless lines[line] + lines[line] = true + @newline = true + end + end + end + + class BeginNode < Node + def newline!(lines) # :nodoc: + # Never mark BeginNode with a newline flag, mark children instead. + end + end + + class ParenthesesNode < Node + def newline!(lines) # :nodoc: + # Never mark ParenthesesNode with a newline flag, mark children instead. + end + end + + class IfNode < Node + def newline!(lines) # :nodoc: + predicate.newline!(lines) + end + end + + class UnlessNode < Node + def newline!(lines) # :nodoc: + predicate.newline!(lines) + end + end + + class UntilNode < Node + def newline!(lines) # :nodoc: + predicate.newline!(lines) + end + end + + class WhileNode < Node + def newline!(lines) # :nodoc: + predicate.newline!(lines) + end + end + + class RescueModifierNode < Node + def newline!(lines) # :nodoc: + expression.newline!(lines) + end + end + + class InterpolatedMatchLastLineNode < Node + def newline!(lines) # :nodoc: + first = parts.first + first.newline!(lines) if first + end + end + + class InterpolatedRegularExpressionNode < Node + def newline!(lines) # :nodoc: + first = parts.first + first.newline!(lines) if first + end + end + + class InterpolatedStringNode < Node + def newline!(lines) # :nodoc: + first = parts.first + first.newline!(lines) if first + end + end + + class InterpolatedSymbolNode < Node + def newline!(lines) # :nodoc: + first = parts.first + first.newline!(lines) if first + end + end + + class InterpolatedXStringNode < Node + def newline!(lines) # :nodoc: + first = parts.first + first.newline!(lines) if first + end + end +end diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb new file mode 100644 index 0000000000..03fec26789 --- /dev/null +++ b/lib/prism/pattern.rb @@ -0,0 +1,268 @@ +# frozen_string_literal: true + +module Prism + # A pattern is an object that wraps a Ruby pattern matching expression. The + # expression would normally be passed to an `in` clause within a `case` + # expression or a rightward assignment expression. For example, in the + # following snippet: + # + # case node + # in ConstantPathNode[ConstantReadNode[name: :Prism], ConstantReadNode[name: :Pattern]] + # end + # + # the pattern is the <tt>ConstantPathNode[...]</tt> expression. + # + # The pattern gets compiled into an object that responds to #call by running + # the #compile method. This method itself will run back through Prism to + # parse the expression into a tree, then walk the tree to generate the + # necessary callable objects. For example, if you wanted to compile the + # expression above into a callable, you would: + # + # callable = Prism::Pattern.new("ConstantPathNode[ConstantReadNode[name: :Prism], ConstantReadNode[name: :Pattern]]").compile + # callable.call(node) + # + # The callable object returned by #compile is guaranteed to respond to #call + # with a single argument, which is the node to match against. It also is + # guaranteed to respond to #===, which means it itself can be used in a `case` + # expression, as in: + # + # case node + # when callable + # end + # + # If the query given to the initializer cannot be compiled into a valid + # matcher (either because of a syntax error or because it is using syntax we + # do not yet support) then a Prism::Pattern::CompilationError will be + # raised. + class Pattern + # Raised when the query given to a pattern is either invalid Ruby syntax or + # is using syntax that we don't yet support. + class CompilationError < StandardError + # Create a new CompilationError with the given representation of the node + # that caused the error. + def initialize(repr) + super(<<~ERROR) + prism was unable to compile the pattern you provided into a usable + expression. It failed on to understand the node represented by: + + #{repr} + + Note that not all syntax supported by Ruby's pattern matching syntax + is also supported by prism's patterns. If you're using some syntax + that you believe should be supported, please open an issue on + GitHub at https://github.com/ruby/prism/issues/new. + ERROR + end + end + + # The query that this pattern was initialized with. + attr_reader :query + + # Create a new pattern with the given query. The query should be a string + # containing a Ruby pattern matching expression. + def initialize(query) + @query = query + @compiled = nil + end + + # Compile the query into a callable object that can be used to match against + # nodes. + def compile + result = Prism.parse("case nil\nin #{query}\nend") + + case_match_node = result.value.statements.body.last + raise CompilationError, case_match_node.inspect unless case_match_node.is_a?(CaseMatchNode) + + in_node = case_match_node.conditions.last + raise CompilationError, in_node.inspect unless in_node.is_a?(InNode) + + compile_node(in_node.pattern) + end + + # Scan the given node and all of its children for nodes that match the + # pattern. If a block is given, it will be called with each node that + # matches the pattern. If no block is given, an enumerator will be returned + # that will yield each node that matches the pattern. + def scan(root) + return to_enum(:scan, root) unless block_given? + + @compiled ||= compile + queue = [root] + + while (node = queue.shift) + yield node if @compiled.call(node) # steep:ignore + queue.concat(node.compact_child_nodes) + end + end + + private + + # Shortcut for combining two procs into one that returns true if both return + # true. + def combine_and(left, right) + ->(other) { left.call(other) && right.call(other) } + end + + # Shortcut for combining two procs into one that returns true if either + # returns true. + def combine_or(left, right) + ->(other) { left.call(other) || right.call(other) } + end + + # Raise an error because the given node is not supported. + def compile_error(node) + raise CompilationError, node.inspect + end + + # in [foo, bar, baz] + def compile_array_pattern_node(node) + compile_error(node) if !node.rest.nil? || node.posts.any? + + constant = node.constant + compiled_constant = compile_node(constant) if constant + + preprocessed = node.requireds.map { |required| compile_node(required) } + + compiled_requireds = ->(other) do + deconstructed = other.deconstruct + + deconstructed.length == preprocessed.length && + preprocessed + .zip(deconstructed) + .all? { |(matcher, value)| matcher.call(value) } + end + + if compiled_constant + combine_and(compiled_constant, compiled_requireds) + else + compiled_requireds + end + end + + # in foo | bar + def compile_alternation_pattern_node(node) + combine_or(compile_node(node.left), compile_node(node.right)) + end + + # in Prism::ConstantReadNode + def compile_constant_path_node(node) + parent = node.parent + + if parent.is_a?(ConstantReadNode) && parent.slice == "Prism" + name = node.name + raise CompilationError, node.inspect if name.nil? + + compile_constant_name(node, name) + else + compile_error(node) + end + end + + # in ConstantReadNode + # in String + def compile_constant_read_node(node) + compile_constant_name(node, node.name) + end + + # Compile a name associated with a constant. + def compile_constant_name(node, name) + if Prism.const_defined?(name, false) + clazz = Prism.const_get(name) + + ->(other) { clazz === other } + elsif Object.const_defined?(name, false) + clazz = Object.const_get(name) + + ->(other) { clazz === other } + else + compile_error(node) + end + end + + # in InstanceVariableReadNode[name: Symbol] + # in { name: Symbol } + def compile_hash_pattern_node(node) + compile_error(node) if node.rest + compiled_constant = compile_node(node.constant) if node.constant + + preprocessed = + node.elements.to_h do |element| + key = element.key + if key.is_a?(SymbolNode) + [key.unescaped.to_sym, compile_node(element.value)] + else + raise CompilationError, element.inspect + end + end + + compiled_keywords = ->(other) do + deconstructed = other.deconstruct_keys(preprocessed.keys) + + preprocessed.all? do |keyword, matcher| + deconstructed.key?(keyword) && matcher.call(deconstructed[keyword]) + end + end + + if compiled_constant + combine_and(compiled_constant, compiled_keywords) + else + compiled_keywords + end + end + + # in nil + def compile_nil_node(node) + ->(attribute) { attribute.nil? } + end + + # in /foo/ + def compile_regular_expression_node(node) + regexp = Regexp.new(node.unescaped, node.closing[1..]) + + ->(attribute) { regexp === attribute } + end + + # in "" + # in "foo" + def compile_string_node(node) + string = node.unescaped + + ->(attribute) { string === attribute } + end + + # in :+ + # in :foo + def compile_symbol_node(node) + symbol = node.unescaped.to_sym + + ->(attribute) { symbol === attribute } + end + + # Compile any kind of node. Dispatch out to the individual compilation + # methods based on the type of node. + def compile_node(node) + case node + when AlternationPatternNode + compile_alternation_pattern_node(node) + when ArrayPatternNode + compile_array_pattern_node(node) + when ConstantPathNode + compile_constant_path_node(node) + when ConstantReadNode + compile_constant_read_node(node) + when HashPatternNode + compile_hash_pattern_node(node) + when NilNode + compile_nil_node(node) + when RegularExpressionNode + compile_regular_expression_node(node) + when StringNode + compile_string_node(node) + when SymbolNode + compile_symbol_node(node) + else + compile_error(node) + end + end + end +end diff --git a/lib/prism/polyfill/byteindex.rb b/lib/prism/polyfill/byteindex.rb new file mode 100644 index 0000000000..98c6089f14 --- /dev/null +++ b/lib/prism/polyfill/byteindex.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +# Polyfill for String#byteindex, which didn't exist until Ruby 3.2. +if !("".respond_to?(:byteindex)) + String.include( + Module.new { + def byteindex(needle, offset = 0) + charindex = index(needle, offset) + slice(0...charindex).bytesize if charindex + end + } + ) +end diff --git a/lib/prism/polyfill/unpack1.rb b/lib/prism/polyfill/unpack1.rb new file mode 100644 index 0000000000..3fa9b5a0c5 --- /dev/null +++ b/lib/prism/polyfill/unpack1.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +# Polyfill for String#unpack1 with the offset parameter. Not all Ruby engines +# have Method#parameters implemented, so we check the arity instead if +# necessary. +if (unpack1 = String.instance_method(:unpack1)).respond_to?(:parameters) ? unpack1.parameters.none? { |_, name| name == :offset } : (unpack1.arity == 1) + String.prepend( + Module.new { + def unpack1(format, offset: 0) # :nodoc: + offset == 0 ? super(format) : self[offset..].unpack1(format) # steep:ignore + end + } + ) +end diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec new file mode 100644 index 0000000000..b4504dbf4b --- /dev/null +++ b/lib/prism/prism.gemspec @@ -0,0 +1,161 @@ +# frozen_string_literal: true + +Gem::Specification.new do |spec| + spec.name = "prism" + spec.version = "0.29.0" + spec.authors = ["Shopify"] + spec.email = ["ruby@shopify.com"] + + spec.summary = "Prism Ruby parser" + spec.homepage = "https://github.com/ruby/prism" + spec.license = "MIT" + + spec.required_ruby_version = ">= 2.7.0" + + spec.require_paths = ["lib"] + spec.files = [ + "BSDmakefile", + "CHANGELOG.md", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.md", + "LICENSE.md", + "Makefile", + "README.md", + "config.yml", + "docs/build_system.md", + "docs/configuration.md", + "docs/cruby_compilation.md", + "docs/design.md", + "docs/encoding.md", + "docs/fuzzing.md", + "docs/heredocs.md", + "docs/javascript.md", + "docs/local_variable_depth.md", + "docs/mapping.md", + "docs/parser_translation.md", + "docs/parsing_rules.md", + "docs/releasing.md", + "docs/ripper_translation.md", + "docs/ruby_api.md", + "docs/ruby_parser_translation.md", + "docs/serialization.md", + "docs/testing.md", + "ext/prism/api_node.c", + "ext/prism/api_pack.c", + "ext/prism/extension.c", + "ext/prism/extension.h", + "include/prism.h", + "include/prism/ast.h", + "include/prism/defines.h", + "include/prism/diagnostic.h", + "include/prism/encoding.h", + "include/prism/node.h", + "include/prism/options.h", + "include/prism/pack.h", + "include/prism/parser.h", + "include/prism/prettyprint.h", + "include/prism/regexp.h", + "include/prism/static_literals.h", + "include/prism/util/pm_buffer.h", + "include/prism/util/pm_char.h", + "include/prism/util/pm_constant_pool.h", + "include/prism/util/pm_integer.h", + "include/prism/util/pm_list.h", + "include/prism/util/pm_memchr.h", + "include/prism/util/pm_newline_list.h", + "include/prism/util/pm_strncasecmp.h", + "include/prism/util/pm_string.h", + "include/prism/util/pm_string_list.h", + "include/prism/util/pm_strpbrk.h", + "include/prism/version.h", + "lib/prism.rb", + "lib/prism/compiler.rb", + "lib/prism/desugar_compiler.rb", + "lib/prism/dispatcher.rb", + "lib/prism/dot_visitor.rb", + "lib/prism/dsl.rb", + "lib/prism/ffi.rb", + "lib/prism/inspect_visitor.rb", + "lib/prism/lex_compat.rb", + "lib/prism/mutation_compiler.rb", + "lib/prism/node_ext.rb", + "lib/prism/node.rb", + "lib/prism/pack.rb", + "lib/prism/parse_result.rb", + "lib/prism/parse_result/comments.rb", + "lib/prism/parse_result/newlines.rb", + "lib/prism/pattern.rb", + "lib/prism/polyfill/byteindex.rb", + "lib/prism/polyfill/unpack1.rb", + "lib/prism/reflection.rb", + "lib/prism/serialize.rb", + "lib/prism/translation.rb", + "lib/prism/translation/parser.rb", + "lib/prism/translation/parser33.rb", + "lib/prism/translation/parser34.rb", + "lib/prism/translation/parser/compiler.rb", + "lib/prism/translation/parser/lexer.rb", + "lib/prism/translation/parser/rubocop.rb", + "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/sexp.rb", + "lib/prism/translation/ripper/shim.rb", + "lib/prism/translation/ruby_parser.rb", + "lib/prism/visitor.rb", + "prism.gemspec", + "rbi/prism.rbi", + "rbi/prism/compiler.rbi", + "rbi/prism/inspect_visitor.rbi", + "rbi/prism/node_ext.rbi", + "rbi/prism/node.rbi", + "rbi/prism/parse_result.rbi", + "rbi/prism/reflection.rbi", + "rbi/prism/translation/parser.rbi", + "rbi/prism/translation/parser33.rbi", + "rbi/prism/translation/parser34.rbi", + "rbi/prism/translation/ripper.rbi", + "rbi/prism/visitor.rbi", + "sig/prism.rbs", + "sig/prism/compiler.rbs", + "sig/prism/dispatcher.rbs", + "sig/prism/dot_visitor.rbs", + "sig/prism/dsl.rbs", + "sig/prism/inspect_visitor.rbs", + "sig/prism/lex_compat.rbs", + "sig/prism/mutation_compiler.rbs", + "sig/prism/node_ext.rbs", + "sig/prism/node.rbs", + "sig/prism/pack.rbs", + "sig/prism/parse_result.rbs", + "sig/prism/pattern.rbs", + "sig/prism/reflection.rbs", + "sig/prism/serialize.rbs", + "sig/prism/visitor.rbs", + "src/diagnostic.c", + "src/encoding.c", + "src/node.c", + "src/options.c", + "src/pack.c", + "src/prettyprint.c", + "src/prism.c", + "src/regexp.c", + "src/serialize.c", + "src/static_literals.c", + "src/token_type.c", + "src/util/pm_buffer.c", + "src/util/pm_char.c", + "src/util/pm_constant_pool.c", + "src/util/pm_integer.c", + "src/util/pm_list.c", + "src/util/pm_memchr.c", + "src/util/pm_newline_list.c", + "src/util/pm_string_list.c", + "src/util/pm_string.c", + "src/util/pm_strncasecmp.c", + "src/util/pm_strpbrk.c" + ] + + spec.extensions = ["ext/prism/extconf.rb"] + spec.metadata["allowed_push_host"] = "https://rubygems.org" + spec.metadata["source_code_uri"] = "https://github.com/ruby/prism" + spec.metadata["changelog_uri"] = "https://github.com/ruby/prism/blob/main/CHANGELOG.md" +end diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb new file mode 100644 index 0000000000..8b75e8a3ab --- /dev/null +++ b/lib/prism/translation.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Prism + # This module is responsible for converting the prism syntax tree into other + # syntax trees. + module Translation # steep:ignore + autoload :Parser, "prism/translation/parser" + autoload :Parser33, "prism/translation/parser33" + autoload :Parser34, "prism/translation/parser34" + autoload :Ripper, "prism/translation/ripper" + autoload :RubyParser, "prism/translation/ruby_parser" + end +end diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb new file mode 100644 index 0000000000..3748fc896e --- /dev/null +++ b/lib/prism/translation/parser.rb @@ -0,0 +1,307 @@ +# frozen_string_literal: true + +begin + require "parser" +rescue LoadError + warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.}) + exit(1) +end + +module Prism + module Translation + # This class is the entry-point for converting a prism syntax tree into the + # whitequark/parser gem's syntax tree. It inherits from the base parser for + # the parser gem, and overrides the parse* methods to parse with prism and + # then translate. + class Parser < ::Parser::Base + Diagnostic = ::Parser::Diagnostic # :nodoc: + private_constant :Diagnostic + + # The parser gem has a list of diagnostics with a hard-coded set of error + # messages. We create our own diagnostic class in order to set our own + # error messages. + class PrismDiagnostic < Diagnostic + # This is the cached message coming from prism. + attr_reader :message + + # Initialize a new diagnostic with the given message and location. + def initialize(message, level, reason, location) + @message = message + super(level, reason, {}, location, []) + end + end + + Racc_debug_parser = false # :nodoc: + + def version # :nodoc: + 34 + end + + # The default encoding for Ruby files is UTF-8. + def default_encoding + Encoding::UTF_8 + end + + def yyerror # :nodoc: + end + + # Parses a source buffer and returns the AST. + def parse(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache) + + build_ast(result.value, offset_cache) + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST and the source code comments. + def parse_with_comments(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache) + + [ + build_ast(result.value, offset_cache), + build_comments(result.comments, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST, the source code comments, + # and the tokens emitted by the lexer. + def tokenize(source_buffer, recover = false) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = + begin + unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache) + rescue ::Parser::SyntaxError + raise if !recover + end + + program, tokens = result.value + ast = build_ast(program, offset_cache) if result.success? + + [ + ast, + build_comments(result.comments, offset_cache), + build_tokens(tokens, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Since prism resolves num params for us, we don't need to support this + # kind of logic here. + def try_declare_numparam(node) + node.children[0].match?(/\A_[1-9]\z/) + end + + private + + # This is a hook to allow consumers to disable some errors if they don't + # want them to block creating the syntax tree. + def valid_error?(error) + true + end + + # This is a hook to allow consumers to disable some warnings if they don't + # want them to block creating the syntax tree. + def valid_warning?(warning) + true + end + + # Build a diagnostic from the given prism parse error. + def error_diagnostic(error, offset_cache) + location = error.location + diagnostic_location = build_range(location, offset_cache) + + case error.type + when :argument_block_multi + Diagnostic.new(:error, :block_and_blockarg, {}, diagnostic_location, []) + when :argument_formal_constant + Diagnostic.new(:error, :argument_const, {}, diagnostic_location, []) + when :argument_formal_class + Diagnostic.new(:error, :argument_cvar, {}, diagnostic_location, []) + when :argument_formal_global + Diagnostic.new(:error, :argument_gvar, {}, diagnostic_location, []) + when :argument_formal_ivar + Diagnostic.new(:error, :argument_ivar, {}, diagnostic_location, []) + when :argument_no_forwarding_amp + Diagnostic.new(:error, :no_anonymous_blockarg, {}, diagnostic_location, []) + when :argument_no_forwarding_star + Diagnostic.new(:error, :no_anonymous_restarg, {}, diagnostic_location, []) + when :argument_no_forwarding_star_star + Diagnostic.new(:error, :no_anonymous_kwrestarg, {}, diagnostic_location, []) + when :begin_lonely_else + location = location.copy(length: 4) + diagnostic_location = build_range(location, offset_cache) + Diagnostic.new(:error, :useless_else, {}, diagnostic_location, []) + when :class_name, :module_name + Diagnostic.new(:error, :module_name_const, {}, diagnostic_location, []) + when :class_in_method + Diagnostic.new(:error, :class_in_def, {}, diagnostic_location, []) + when :def_endless_setter + Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, []) + when :embdoc_term + Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, []) + when :incomplete_variable_class, :incomplete_variable_class_3_3 + location = location.copy(length: location.length + 1) + diagnostic_location = build_range(location, offset_cache) + + Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, []) + when :incomplete_variable_instance, :incomplete_variable_instance_3_3 + location = location.copy(length: location.length + 1) + diagnostic_location = build_range(location, offset_cache) + + Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, []) + when :invalid_variable_global, :invalid_variable_global_3_3 + Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, []) + when :module_in_method + Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, []) + when :numbered_parameter_ordinary + Diagnostic.new(:error, :ordinary_param_defined, {}, diagnostic_location, []) + when :numbered_parameter_outer_scope + Diagnostic.new(:error, :numparam_used_in_outer_scope, {}, diagnostic_location, []) + when :parameter_circular + Diagnostic.new(:error, :circular_argument_reference, { var_name: location.slice }, diagnostic_location, []) + when :parameter_name_repeat + Diagnostic.new(:error, :duplicate_argument, {}, diagnostic_location, []) + when :parameter_numbered_reserved + Diagnostic.new(:error, :reserved_for_numparam, { name: location.slice }, diagnostic_location, []) + when :regexp_unknown_options + Diagnostic.new(:error, :regexp_options, { options: location.slice[1..] }, diagnostic_location, []) + when :singleton_for_literals + Diagnostic.new(:error, :singleton_literal, {}, diagnostic_location, []) + when :string_literal_eof + Diagnostic.new(:error, :string_eof, {}, diagnostic_location, []) + when :unexpected_token_ignore + Diagnostic.new(:error, :unexpected_token, { token: location.slice }, diagnostic_location, []) + when :write_target_in_method + Diagnostic.new(:error, :dynamic_const, {}, diagnostic_location, []) + else + PrismDiagnostic.new(error.message, :error, error.type, diagnostic_location) + end + end + + # Build a diagnostic from the given prism parse warning. + def warning_diagnostic(warning, offset_cache) + diagnostic_location = build_range(warning.location, offset_cache) + + case warning.type + when :ambiguous_first_argument_plus + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "+" }, diagnostic_location, []) + when :ambiguous_first_argument_minus + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "-" }, diagnostic_location, []) + when :ambiguous_prefix_ampersand + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "&" }, diagnostic_location, []) + when :ambiguous_prefix_star + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "*" }, diagnostic_location, []) + when :ambiguous_prefix_star_star + Diagnostic.new(:warning, :ambiguous_prefix, { prefix: "**" }, diagnostic_location, []) + when :ambiguous_slash + Diagnostic.new(:warning, :ambiguous_regexp, {}, diagnostic_location, []) + when :dot_dot_dot_eol + Diagnostic.new(:warning, :triple_dot_at_eol, {}, diagnostic_location, []) + when :duplicated_hash_key + # skip, parser does this on its own + else + PrismDiagnostic.new(warning.message, :warning, warning.type, diagnostic_location) + end + end + + # If there was a error generated during the parse, then raise an + # appropriate syntax error. Otherwise return the result. + def unwrap(result, offset_cache) + result.errors.each do |error| + next unless valid_error?(error) + diagnostics.process(error_diagnostic(error, offset_cache)) + end + + result.warnings.each do |warning| + next unless valid_warning?(warning) + diagnostic = warning_diagnostic(warning, offset_cache) + diagnostics.process(diagnostic) if diagnostic + end + + result + end + + # Prism deals with offsets in bytes, while the parser gem deals with + # offsets in characters. We need to handle this conversion in order to + # build the parser gem AST. + # + # If the bytesize of the source is the same as the length, then we can + # just use the offset directly. Otherwise, we build an array where the + # index is the byte offset and the value is the character offset. + def build_offset_cache(source) + if source.bytesize == source.length + -> (offset) { offset } + else + offset_cache = [] + offset = 0 + + source.each_char do |char| + char.bytesize.times { offset_cache << offset } + offset += 1 + end + + offset_cache << offset + end + end + + # Build the parser gem AST from the prism AST. + def build_ast(program, offset_cache) + program.accept(Compiler.new(self, offset_cache)) + end + + # Build the parser gem comments from the prism comments. + def build_comments(comments, offset_cache) + comments.map do |comment| + ::Parser::Source::Comment.new(build_range(comment.location, offset_cache)) + end + end + + # Build the parser gem tokens from the prism tokens. + def build_tokens(tokens, offset_cache) + Lexer.new(source_buffer, tokens, offset_cache).to_a + end + + # Build a range from a prism location. + def build_range(location, offset_cache) + ::Parser::Source::Range.new( + source_buffer, + offset_cache[location.start_offset], + offset_cache[location.end_offset] + ) + end + + # Converts the version format handled by Parser to the format handled by Prism. + def convert_for_prism(version) + case version + when 33 + "3.3.1" + when 34 + "3.4.0" + else + "latest" + end + end + + require_relative "parser/compiler" + require_relative "parser/lexer" + + private_constant :Compiler + private_constant :Lexer + end + end +end diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb new file mode 100644 index 0000000000..08e54a7bf3 --- /dev/null +++ b/lib/prism/translation/parser/compiler.rb @@ -0,0 +1,2111 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Parser + # A visitor that knows how to convert a prism syntax tree into the + # whitequark/parser gem's syntax tree. + class Compiler < ::Prism::Compiler + # Raised when the tree is malformed or there is a bug in the compiler. + class CompilationError < StandardError + end + + # The Parser::Base instance that is being used to build the AST. + attr_reader :parser + + # The Parser::Builders::Default instance that is being used to build the + # AST. + attr_reader :builder + + # The Parser::Source::Buffer instance that is holding a reference to the + # source code. + attr_reader :source_buffer + + # The offset cache that is used to map between byte and character + # offsets in the file. + attr_reader :offset_cache + + # The types of values that can be forwarded in the current scope. + attr_reader :forwarding + + # Whether or not the current node is in a destructure. + attr_reader :in_destructure + + # Whether or not the current node is in a pattern. + attr_reader :in_pattern + + # Initialize a new compiler with the given parser, offset cache, and + # options. + def initialize(parser, offset_cache, forwarding: [], in_destructure: false, in_pattern: false) + @parser = parser + @builder = parser.builder + @source_buffer = parser.source_buffer + @offset_cache = offset_cache + + @forwarding = forwarding + @in_destructure = in_destructure + @in_pattern = in_pattern + end + + # alias foo bar + # ^^^^^^^^^^^^^ + def visit_alias_method_node(node) + builder.alias(token(node.keyword_loc), visit(node.new_name), visit(node.old_name)) + end + + # alias $foo $bar + # ^^^^^^^^^^^^^^^ + def visit_alias_global_variable_node(node) + builder.alias(token(node.keyword_loc), visit(node.new_name), visit(node.old_name)) + end + + # foo => bar | baz + # ^^^^^^^^^ + def visit_alternation_pattern_node(node) + builder.match_alt(visit(node.left), token(node.operator_loc), visit(node.right)) + end + + # a and b + # ^^^^^^^ + def visit_and_node(node) + builder.logical_op(:and, visit(node.left), token(node.operator_loc), visit(node.right)) + end + + # [] + # ^^ + def visit_array_node(node) + builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc)) + end + + # foo => [bar] + # ^^^^^ + def visit_array_pattern_node(node) + elements = [*node.requireds] + elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + elements.concat(node.posts) + visited = visit_all(elements) + + if node.rest.is_a?(ImplicitRestNode) + visited[-1] = builder.match_with_trailing_comma(visited[-1], token(node.rest.location)) + end + + if node.constant + builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc)) + else + builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc)) + end + end + + # foo(bar) + # ^^^ + def visit_arguments_node(node) + visit_all(node.arguments) + end + + # { a: 1 } + # ^^^^ + def visit_assoc_node(node) + if in_pattern + if node.value.is_a?(ImplicitNode) + if node.key.is_a?(SymbolNode) + builder.match_hash_var([node.key.unescaped, srange(node.key.location)]) + else + builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc)) + end + else + builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value)) + end + elsif node.value.is_a?(ImplicitNode) + if (value = node.value.value).is_a?(LocalVariableReadNode) + builder.pair_keyword( + [node.key.unescaped, srange(node.key)], + builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar) + ) + else + builder.pair_label([node.key.unescaped, srange(node.key.location)]) + end + elsif node.operator_loc + builder.pair(visit(node.key), token(node.operator_loc), visit(node.value)) + elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil? + builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value)) + else + parts = + if node.key.is_a?(SymbolNode) + [builder.string_internal([node.key.unescaped, srange(node.key.value_loc)])] + else + visit_all(node.key.parts) + end + + builder.pair_quoted(token(node.key.opening_loc), parts, token(node.key.closing_loc), visit(node.value)) + end + end + + # def foo(**); bar(**); end + # ^^ + # + # { **foo } + # ^^^^^ + def visit_assoc_splat_node(node) + if node.value.nil? && forwarding.include?(:**) + builder.forwarded_kwrestarg(token(node.operator_loc)) + else + builder.kwsplat(token(node.operator_loc), visit(node.value)) + end + end + + # $+ + # ^^ + def visit_back_reference_read_node(node) + builder.back_ref(token(node.location)) + end + + # begin end + # ^^^^^^^^^ + def visit_begin_node(node) + rescue_bodies = [] + + if (rescue_clause = node.rescue_clause) + begin + find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset + find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.consequent&.location&.start_offset || (find_start_offset + 1)) + + rescue_bodies << builder.rescue_body( + token(rescue_clause.keyword_loc), + rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil, + token(rescue_clause.operator_loc), + visit(rescue_clause.reference), + srange_find(find_start_offset, find_end_offset, [";"]), + visit(rescue_clause.statements) + ) + end until (rescue_clause = rescue_clause.consequent).nil? + end + + begin_body = + builder.begin_body( + visit(node.statements), + rescue_bodies, + token(node.else_clause&.else_keyword_loc), + visit(node.else_clause), + token(node.ensure_clause&.ensure_keyword_loc), + visit(node.ensure_clause&.statements) + ) + + if node.begin_keyword_loc + builder.begin_keyword(token(node.begin_keyword_loc), begin_body, token(node.end_keyword_loc)) + else + begin_body + end + end + + # foo(&bar) + # ^^^^ + def visit_block_argument_node(node) + builder.block_pass(token(node.operator_loc), visit(node.expression)) + end + + # foo { |; bar| } + # ^^^ + def visit_block_local_variable_node(node) + builder.shadowarg(token(node.location)) + end + + # A block on a keyword or method call. + def visit_block_node(node) + raise CompilationError, "Cannot directly compile block nodes" + end + + # def foo(&bar); end + # ^^^^ + def visit_block_parameter_node(node) + builder.blockarg(token(node.operator_loc), token(node.name_loc)) + end + + # A block's parameters. + def visit_block_parameters_node(node) + [*visit(node.parameters)].concat(visit_all(node.locals)) + end + + # break + # ^^^^^ + # + # break foo + # ^^^^^^^^^ + def visit_break_node(node) + builder.keyword_cmd(:break, token(node.keyword_loc), nil, visit(node.arguments) || [], nil) + end + + # foo + # ^^^ + # + # foo.bar + # ^^^^^^^ + # + # foo.bar() {} + # ^^^^^^^^^^^^ + def visit_call_node(node) + name = node.name + arguments = node.arguments&.arguments || [] + block = node.block + + if block.is_a?(BlockArgumentNode) + arguments = [*arguments, block] + block = nil + end + + if node.call_operator_loc.nil? + case name + when :-@ + case (receiver = node.receiver).type + when :integer_node, :float_node, :rational_node, :imaginary_node + return visit(numeric_negate(node.message_loc, receiver)) + end + when :! + return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block) + when :=~ + if (receiver = node.receiver).is_a?(RegularExpressionNode) + return builder.match_op(visit(receiver), token(node.message_loc), visit(node.arguments.arguments.first)) + end + when :[] + return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block) + when :[]= + if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation? + arguments = node.arguments.arguments[...-1] + arguments << node.block if node.block + + return visit_block( + builder.assign( + builder.index_asgn( + visit(node.receiver), + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc), + ), + srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]), + visit(node.arguments.arguments.last) + ), + block + ) + end + end + end + + message_loc = node.message_loc + call_operator_loc = node.call_operator_loc + call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc + + visit_block( + if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil? + builder.assign( + builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)), + srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]), + visit(node.arguments.arguments.last) + ) + else + builder.call_method( + visit(node.receiver), + call_operator, + message_loc ? [node.name, srange(message_loc)] : nil, + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc) + ) + end, + block + ) + end + + # foo.bar += baz + # ^^^^^^^^^^^^^^^ + def visit_call_operator_write_node(node) + call_operator_loc = node.call_operator_loc + + builder.op_assign( + builder.call_method( + visit(node.receiver), + call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)], + node.message_loc ? [node.read_name, srange(node.message_loc)] : nil, + nil, + [], + nil + ), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # foo.bar &&= baz + # ^^^^^^^^^^^^^^^ + def visit_call_and_write_node(node) + call_operator_loc = node.call_operator_loc + + builder.op_assign( + builder.call_method( + visit(node.receiver), + call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)], + node.message_loc ? [node.read_name, srange(node.message_loc)] : nil, + nil, + [], + nil + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # foo.bar ||= baz + # ^^^^^^^^^^^^^^^ + def visit_call_or_write_node(node) + call_operator_loc = node.call_operator_loc + + builder.op_assign( + builder.call_method( + visit(node.receiver), + call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)], + node.message_loc ? [node.read_name, srange(node.message_loc)] : nil, + nil, + [], + nil + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # foo.bar, = 1 + # ^^^^^^^ + def visit_call_target_node(node) + call_operator_loc = node.call_operator_loc + + builder.attr_asgn( + visit(node.receiver), + call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)], + token(node.message_loc) + ) + end + + # foo => bar => baz + # ^^^^^^^^^^ + def visit_capture_pattern_node(node) + builder.match_as(visit(node.value), token(node.operator_loc), visit(node.target)) + end + + # case foo; when bar; end + # ^^^^^^^^^^^^^^^^^^^^^^^ + def visit_case_node(node) + builder.case( + token(node.case_keyword_loc), + visit(node.predicate), + visit_all(node.conditions), + token(node.consequent&.else_keyword_loc), + visit(node.consequent), + token(node.end_keyword_loc) + ) + end + + # case foo; in bar; end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_case_match_node(node) + builder.case_match( + token(node.case_keyword_loc), + visit(node.predicate), + visit_all(node.conditions), + token(node.consequent&.else_keyword_loc), + visit(node.consequent), + token(node.end_keyword_loc) + ) + end + + # class Foo; end + # ^^^^^^^^^^^^^^ + def visit_class_node(node) + builder.def_class( + token(node.class_keyword_loc), + visit(node.constant_path), + token(node.inheritance_operator_loc), + visit(node.superclass), + node.body&.accept(copy_compiler(forwarding: [])), + token(node.end_keyword_loc) + ) + end + + # @@foo + # ^^^^^ + def visit_class_variable_read_node(node) + builder.cvar(token(node.location)) + end + + # @@foo = 1 + # ^^^^^^^^^ + def visit_class_variable_write_node(node) + builder.assign( + builder.assignable(builder.cvar(token(node.name_loc))), + token(node.operator_loc), + visit(node.value) + ) + end + + # @@foo += bar + # ^^^^^^^^^^^^ + def visit_class_variable_operator_write_node(node) + builder.op_assign( + builder.assignable(builder.cvar(token(node.name_loc))), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # @@foo &&= bar + # ^^^^^^^^^^^^^ + def visit_class_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.cvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # @@foo ||= bar + # ^^^^^^^^^^^^^ + def visit_class_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.cvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # @@foo, = bar + # ^^^^^ + def visit_class_variable_target_node(node) + builder.assignable(builder.cvar(token(node.location))) + end + + # Foo + # ^^^ + def visit_constant_read_node(node) + builder.const([node.name, srange(node.location)]) + end + + # Foo = 1 + # ^^^^^^^ + # + # Foo, Bar = 1 + # ^^^ ^^^ + def visit_constant_write_node(node) + builder.assign(builder.assignable(builder.const([node.name, srange(node.name_loc)])), token(node.operator_loc), visit(node.value)) + end + + # Foo += bar + # ^^^^^^^^^^^ + def visit_constant_operator_write_node(node) + builder.op_assign( + builder.assignable(builder.const([node.name, srange(node.name_loc)])), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # Foo &&= bar + # ^^^^^^^^^^^^ + def visit_constant_and_write_node(node) + builder.op_assign( + builder.assignable(builder.const([node.name, srange(node.name_loc)])), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # Foo ||= bar + # ^^^^^^^^^^^^ + def visit_constant_or_write_node(node) + builder.op_assign( + builder.assignable(builder.const([node.name, srange(node.name_loc)])), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # Foo, = bar + # ^^^ + def visit_constant_target_node(node) + builder.assignable(builder.const([node.name, srange(node.location)])) + end + + # Foo::Bar + # ^^^^^^^^ + def visit_constant_path_node(node) + if node.parent.nil? + builder.const_global( + token(node.delimiter_loc), + [node.name, srange(node.name_loc)] + ) + else + builder.const_fetch( + visit(node.parent), + token(node.delimiter_loc), + [node.name, srange(node.name_loc)] + ) + end + end + + # Foo::Bar = 1 + # ^^^^^^^^^^^^ + # + # Foo::Foo, Bar::Bar = 1 + # ^^^^^^^^ ^^^^^^^^ + def visit_constant_path_write_node(node) + builder.assign( + builder.assignable(visit(node.target)), + token(node.operator_loc), + visit(node.value) + ) + end + + # Foo::Bar += baz + # ^^^^^^^^^^^^^^^ + def visit_constant_path_operator_write_node(node) + builder.op_assign( + builder.assignable(visit(node.target)), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # Foo::Bar &&= baz + # ^^^^^^^^^^^^^^^^ + def visit_constant_path_and_write_node(node) + builder.op_assign( + builder.assignable(visit(node.target)), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # Foo::Bar ||= baz + # ^^^^^^^^^^^^^^^^ + def visit_constant_path_or_write_node(node) + builder.op_assign( + builder.assignable(visit(node.target)), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # Foo::Bar, = baz + # ^^^^^^^^ + def visit_constant_path_target_node(node) + builder.assignable(visit_constant_path_node(node)) + end + + # def foo; end + # ^^^^^^^^^^^^ + # + # def self.foo; end + # ^^^^^^^^^^^^^^^^^ + def visit_def_node(node) + if node.equal_loc + if node.receiver + builder.def_endless_singleton( + token(node.def_keyword_loc), + visit(node.receiver.is_a?(ParenthesesNode) ? node.receiver.body : node.receiver), + token(node.operator_loc), + token(node.name_loc), + builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false), + token(node.equal_loc), + node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))) + ) + else + builder.def_endless_method( + token(node.def_keyword_loc), + token(node.name_loc), + builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false), + token(node.equal_loc), + node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))) + ) + end + elsif node.receiver + builder.def_singleton( + token(node.def_keyword_loc), + visit(node.receiver.is_a?(ParenthesesNode) ? node.receiver.body : node.receiver), + token(node.operator_loc), + token(node.name_loc), + builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false), + node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))), + token(node.end_keyword_loc) + ) + else + builder.def_method( + token(node.def_keyword_loc), + token(node.name_loc), + builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false), + node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))), + token(node.end_keyword_loc) + ) + end + end + + # defined? a + # ^^^^^^^^^^ + # + # defined?(a) + # ^^^^^^^^^^^ + def visit_defined_node(node) + builder.keyword_cmd( + :defined?, + token(node.keyword_loc), + token(node.lparen_loc), + [visit(node.value)], + token(node.rparen_loc) + ) + end + + # if foo then bar else baz end + # ^^^^^^^^^^^^ + def visit_else_node(node) + visit(node.statements) + end + + # "foo #{bar}" + # ^^^^^^ + def visit_embedded_statements_node(node) + builder.begin( + token(node.opening_loc), + visit(node.statements), + token(node.closing_loc) + ) + end + + # "foo #@bar" + # ^^^^^ + def visit_embedded_variable_node(node) + visit(node.variable) + end + + # begin; foo; ensure; bar; end + # ^^^^^^^^^^^^ + def visit_ensure_node(node) + raise CompilationError, "Cannot directly compile ensure nodes" + end + + # false + # ^^^^^ + def visit_false_node(node) + builder.false(token(node.location)) + end + + # foo => [*, bar, *] + # ^^^^^^^^^^^ + def visit_find_pattern_node(node) + elements = [node.left, *node.requireds, node.right] + + if node.constant + builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc)) + else + builder.find_pattern(token(node.opening_loc), visit_all(elements), token(node.closing_loc)) + end + end + + # 1.0 + # ^^^ + def visit_float_node(node) + visit_numeric(node, builder.float([node.value, srange(node.location)])) + end + + # for foo in bar do end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_for_node(node) + builder.for( + token(node.for_keyword_loc), + visit(node.index), + token(node.in_keyword_loc), + visit(node.collection), + if node.do_keyword_loc + token(node.do_keyword_loc) + else + srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, [";"]) + end, + visit(node.statements), + token(node.end_keyword_loc) + ) + end + + # def foo(...); bar(...); end + # ^^^ + def visit_forwarding_arguments_node(node) + builder.forwarded_args(token(node.location)) + end + + # def foo(...); end + # ^^^ + def visit_forwarding_parameter_node(node) + builder.forward_arg(token(node.location)) + end + + # super + # ^^^^^ + # + # super {} + # ^^^^^^^^ + def visit_forwarding_super_node(node) + visit_block( + builder.keyword_cmd( + :zsuper, + ["super", srange_offsets(node.location.start_offset, node.location.start_offset + 5)] + ), + node.block + ) + end + + # $foo + # ^^^^ + def visit_global_variable_read_node(node) + builder.gvar(token(node.location)) + end + + # $foo = 1 + # ^^^^^^^^ + def visit_global_variable_write_node(node) + builder.assign( + builder.assignable(builder.gvar(token(node.name_loc))), + token(node.operator_loc), + visit(node.value) + ) + end + + # $foo += bar + # ^^^^^^^^^^^ + def visit_global_variable_operator_write_node(node) + builder.op_assign( + builder.assignable(builder.gvar(token(node.name_loc))), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # $foo &&= bar + # ^^^^^^^^^^^^ + def visit_global_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.gvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # $foo ||= bar + # ^^^^^^^^^^^^ + def visit_global_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.gvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # $foo, = bar + # ^^^^ + def visit_global_variable_target_node(node) + builder.assignable(builder.gvar([node.slice, srange(node.location)])) + end + + # {} + # ^^ + def visit_hash_node(node) + builder.associate( + token(node.opening_loc), + visit_all(node.elements), + token(node.closing_loc) + ) + end + + # foo => {} + # ^^ + def visit_hash_pattern_node(node) + elements = [*node.elements, *node.rest] + + if node.constant + builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.hash_pattern(nil, visit_all(elements), nil), token(node.closing_loc)) + else + builder.hash_pattern(token(node.opening_loc), visit_all(elements), token(node.closing_loc)) + end + end + + # if foo then bar end + # ^^^^^^^^^^^^^^^^^^^ + # + # bar if foo + # ^^^^^^^^^^ + # + # foo ? bar : baz + # ^^^^^^^^^^^^^^^ + def visit_if_node(node) + if !node.if_keyword_loc + builder.ternary( + visit(node.predicate), + token(node.then_keyword_loc), + visit(node.statements), + token(node.consequent.else_keyword_loc), + visit(node.consequent) + ) + elsif node.if_keyword_loc.start_offset == node.location.start_offset + builder.condition( + token(node.if_keyword_loc), + visit(node.predicate), + if node.then_keyword_loc + token(node.then_keyword_loc) + else + srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"]) + end, + visit(node.statements), + case node.consequent + when IfNode + token(node.consequent.if_keyword_loc) + when ElseNode + token(node.consequent.else_keyword_loc) + end, + visit(node.consequent), + if node.if_keyword != "elsif" + token(node.end_keyword_loc) + end + ) + else + builder.condition_mod( + visit(node.statements), + visit(node.consequent), + token(node.if_keyword_loc), + visit(node.predicate) + ) + end + end + + # 1i + # ^^ + def visit_imaginary_node(node) + visit_numeric(node, builder.complex([Complex(0, node.numeric.value), srange(node.location)])) + end + + # { foo: } + # ^^^^ + def visit_implicit_node(node) + raise CompilationError, "Cannot directly compile implicit nodes" + end + + # foo { |bar,| } + # ^ + def visit_implicit_rest_node(node) + raise CompilationError, "Cannot compile implicit rest nodes" + end + + # case foo; in bar; end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_in_node(node) + pattern = nil + guard = nil + + case node.pattern + when IfNode + pattern = within_pattern { |compiler| node.pattern.statements.accept(compiler) } + guard = builder.if_guard(token(node.pattern.if_keyword_loc), visit(node.pattern.predicate)) + when UnlessNode + pattern = within_pattern { |compiler| node.pattern.statements.accept(compiler) } + guard = builder.unless_guard(token(node.pattern.keyword_loc), visit(node.pattern.predicate)) + else + pattern = within_pattern { |compiler| node.pattern.accept(compiler) } + end + + builder.in_pattern( + token(node.in_loc), + pattern, + guard, + srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, [";", "then"]), + visit(node.statements) + ) + end + + # foo[bar] += baz + # ^^^^^^^^^^^^^^^ + def visit_index_operator_write_node(node) + arguments = node.arguments&.arguments || [] + arguments << node.block if node.block + + builder.op_assign( + builder.index( + visit(node.receiver), + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc) + ), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # foo[bar] &&= baz + # ^^^^^^^^^^^^^^^^ + def visit_index_and_write_node(node) + arguments = node.arguments&.arguments || [] + arguments << node.block if node.block + + builder.op_assign( + builder.index( + visit(node.receiver), + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc) + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # foo[bar] ||= baz + # ^^^^^^^^^^^^^^^^ + def visit_index_or_write_node(node) + arguments = node.arguments&.arguments || [] + arguments << node.block if node.block + + builder.op_assign( + builder.index( + visit(node.receiver), + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc) + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # foo[bar], = 1 + # ^^^^^^^^ + def visit_index_target_node(node) + builder.index_asgn( + visit(node.receiver), + token(node.opening_loc), + visit_all(node.arguments.arguments), + token(node.closing_loc), + ) + end + + # @foo + # ^^^^ + def visit_instance_variable_read_node(node) + builder.ivar(token(node.location)) + end + + # @foo = 1 + # ^^^^^^^^ + def visit_instance_variable_write_node(node) + builder.assign( + builder.assignable(builder.ivar(token(node.name_loc))), + token(node.operator_loc), + visit(node.value) + ) + end + + # @foo += bar + # ^^^^^^^^^^^ + def visit_instance_variable_operator_write_node(node) + builder.op_assign( + builder.assignable(builder.ivar(token(node.name_loc))), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # @foo &&= bar + # ^^^^^^^^^^^^ + def visit_instance_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.ivar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # @foo ||= bar + # ^^^^^^^^^^^^ + def visit_instance_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.ivar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # @foo, = bar + # ^^^^ + def visit_instance_variable_target_node(node) + builder.assignable(builder.ivar(token(node.location))) + end + + # 1 + # ^ + def visit_integer_node(node) + visit_numeric(node, builder.integer([node.value, srange(node.location)])) + end + + # /foo #{bar}/ + # ^^^^^^^^^^^^ + def visit_interpolated_regular_expression_node(node) + builder.regexp_compose( + token(node.opening_loc), + visit_all(node.parts), + [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)], + builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)]) + ) + end + + # if /foo #{bar}/ then end + # ^^^^^^^^^^^^ + alias visit_interpolated_match_last_line_node visit_interpolated_regular_expression_node + + # "foo #{bar}" + # ^^^^^^^^^^^^ + def visit_interpolated_string_node(node) + if node.heredoc? + children, closing = visit_heredoc(node) + opening = token(node.opening_loc) + + start_offset = node.opening_loc.end_offset + 1 + end_offset = node.parts.first.location.start_offset + + # In the below case, the offsets should be the same: + # + # <<~HEREDOC + # a #{b} + # HEREDOC + # + # But in this case, the end_offset would be greater than the start_offset: + # + # <<~HEREDOC + # #{b} + # HEREDOC + # + # So we need to make sure the result node's heredoc range is correct, without updating the children + result = if start_offset < end_offset + # We need to add a padding string to ensure that the heredoc has correct range for its body + padding_string_node = builder.string_internal(["", srange_offsets(start_offset, end_offset)]) + node_with_correct_location = builder.string_compose(opening, [padding_string_node, *children], closing) + # But the padding string should not be included in the final AST, so we need to update the result's children + node_with_correct_location.updated(:dstr, children) + else + builder.string_compose(opening, children, closing) + end + + return result + end + + parts = if node.parts.one? { |part| part.type == :string_node } + node.parts.flat_map do |node| + if node.type == :string_node && node.unescaped.lines.count >= 2 + start_offset = node.content_loc.start_offset + + node.unescaped.lines.map do |line| + end_offset = start_offset + line.length + offsets = srange_offsets(start_offset, end_offset) + start_offset = end_offset + + builder.string_internal([line, offsets]) + end + else + visit(node) + end + end + else + visit_all(node.parts) + end + + builder.string_compose( + token(node.opening_loc), + parts, + token(node.closing_loc) + ) + end + + # :"foo #{bar}" + # ^^^^^^^^^^^^^ + def visit_interpolated_symbol_node(node) + builder.symbol_compose( + token(node.opening_loc), + visit_all(node.parts), + token(node.closing_loc) + ) + end + + # `foo #{bar}` + # ^^^^^^^^^^^^ + def visit_interpolated_x_string_node(node) + if node.heredoc? + children, closing = visit_heredoc(node) + builder.xstring_compose(token(node.opening_loc), children, closing) + else + builder.xstring_compose( + token(node.opening_loc), + visit_all(node.parts), + token(node.closing_loc) + ) + end + end + + # -> { it } + # ^^ + def visit_it_local_variable_read_node(node) + builder.ident([:it, srange(node.location)]).updated(:lvar) + end + + # -> { it } + # ^^^^^^^^^ + def visit_it_parameters_node(node) + builder.args(nil, [], nil, false) + end + + # foo(bar: baz) + # ^^^^^^^^ + def visit_keyword_hash_node(node) + builder.associate(nil, visit_all(node.elements), nil) + end + + # def foo(**bar); end + # ^^^^^ + # + # def foo(**); end + # ^^ + def visit_keyword_rest_parameter_node(node) + builder.kwrestarg( + token(node.operator_loc), + node.name ? [node.name, srange(node.name_loc)] : nil + ) + end + + # -> {} + # ^^^^^ + def visit_lambda_node(node) + parameters = node.parameters + implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode) + + builder.block( + builder.call_lambda(token(node.operator_loc)), + [node.opening, srange(node.opening_loc)], + if parameters.nil? + builder.args(nil, [], nil, false) + elsif implicit_parameters + visit(node.parameters) + else + builder.args( + token(node.parameters.opening_loc), + visit(node.parameters), + token(node.parameters.closing_loc), + false + ) + end, + node.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))), + [node.closing, srange(node.closing_loc)] + ) + end + + # foo + # ^^^ + def visit_local_variable_read_node(node) + builder.ident([node.name, srange(node.location)]).updated(:lvar) + end + + # foo = 1 + # ^^^^^^^ + def visit_local_variable_write_node(node) + builder.assign( + builder.assignable(builder.ident(token(node.name_loc))), + token(node.operator_loc), + visit(node.value) + ) + end + + # foo += bar + # ^^^^^^^^^^ + def visit_local_variable_operator_write_node(node) + builder.op_assign( + builder.assignable(builder.ident(token(node.name_loc))), + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], + visit(node.value) + ) + end + + # foo &&= bar + # ^^^^^^^^^^^ + def visit_local_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.ident(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # foo ||= bar + # ^^^^^^^^^^^ + def visit_local_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.ident(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end + + # foo, = bar + # ^^^ + def visit_local_variable_target_node(node) + if in_pattern + builder.assignable(builder.match_var([node.name, srange(node.location)])) + else + builder.assignable(builder.ident(token(node.location))) + end + end + + # foo in bar + # ^^^^^^^^^^ + def visit_match_predicate_node(node) + builder.match_pattern_p( + visit(node.value), + token(node.operator_loc), + within_pattern { |compiler| node.pattern.accept(compiler) } + ) + end + + # foo => bar + # ^^^^^^^^^^ + def visit_match_required_node(node) + builder.match_pattern( + visit(node.value), + token(node.operator_loc), + within_pattern { |compiler| node.pattern.accept(compiler) } + ) + end + + # /(?<foo>foo)/ =~ bar + # ^^^^^^^^^^^^^^^^^^^^ + def visit_match_write_node(node) + builder.match_op( + visit(node.call.receiver), + token(node.call.message_loc), + visit(node.call.arguments.arguments.first) + ) + end + + # A node that is missing from the syntax tree. This is only used in the + # case of a syntax error. The parser gem doesn't have such a concept, so + # we invent our own here. + def visit_missing_node(node) + ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location))) + end + + # module Foo; end + # ^^^^^^^^^^^^^^^ + def visit_module_node(node) + builder.def_module( + token(node.module_keyword_loc), + visit(node.constant_path), + node.body&.accept(copy_compiler(forwarding: [])), + token(node.end_keyword_loc) + ) + end + + # foo, bar = baz + # ^^^^^^^^ + def visit_multi_target_node(node) + elements = [*node.lefts] + elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + elements.concat(node.rights) + + builder.multi_lhs( + token(node.lparen_loc), + visit_all(elements), + token(node.rparen_loc) + ) + end + + # foo, bar = baz + # ^^^^^^^^^^^^^^ + def visit_multi_write_node(node) + elements = [*node.lefts] + elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + elements.concat(node.rights) + + builder.multi_assign( + builder.multi_lhs( + token(node.lparen_loc), + visit_all(elements), + token(node.rparen_loc) + ), + token(node.operator_loc), + visit(node.value) + ) + end + + # next + # ^^^^ + # + # next foo + # ^^^^^^^^ + def visit_next_node(node) + builder.keyword_cmd( + :next, + token(node.keyword_loc), + nil, + visit(node.arguments) || [], + nil + ) + end + + # nil + # ^^^ + def visit_nil_node(node) + builder.nil(token(node.location)) + end + + # def foo(**nil); end + # ^^^^^ + def visit_no_keywords_parameter_node(node) + if in_pattern + builder.match_nil_pattern(token(node.operator_loc), token(node.keyword_loc)) + else + builder.kwnilarg(token(node.operator_loc), token(node.keyword_loc)) + end + end + + # -> { _1 + _2 } + # ^^^^^^^^^^^^^^ + def visit_numbered_parameters_node(node) + builder.numargs(node.maximum) + end + + # $1 + # ^^ + def visit_numbered_reference_read_node(node) + builder.nth_ref([node.number, srange(node.location)]) + end + + # def foo(bar: baz); end + # ^^^^^^^^ + def visit_optional_keyword_parameter_node(node) + builder.kwoptarg([node.name, srange(node.name_loc)], visit(node.value)) + end + + # def foo(bar = 1); end + # ^^^^^^^ + def visit_optional_parameter_node(node) + builder.optarg(token(node.name_loc), token(node.operator_loc), visit(node.value)) + end + + # a or b + # ^^^^^^ + def visit_or_node(node) + builder.logical_op(:or, visit(node.left), token(node.operator_loc), visit(node.right)) + end + + # def foo(bar, *baz); end + # ^^^^^^^^^ + def visit_parameters_node(node) + params = [] + + if node.requireds.any? + node.requireds.each do |required| + if required.is_a?(RequiredParameterNode) + params << visit(required) + else + compiler = copy_compiler(in_destructure: true) + params << required.accept(compiler) + end + end + end + + params.concat(visit_all(node.optionals)) if node.optionals.any? + params << visit(node.rest) if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + + if node.posts.any? + node.posts.each do |post| + if post.is_a?(RequiredParameterNode) + params << visit(post) + else + compiler = copy_compiler(in_destructure: true) + params << post.accept(compiler) + end + end + end + + params.concat(visit_all(node.keywords)) if node.keywords.any? + params << visit(node.keyword_rest) if !node.keyword_rest.nil? + params << visit(node.block) if !node.block.nil? + params + end + + # () + # ^^ + # + # (1) + # ^^^ + def visit_parentheses_node(node) + builder.begin( + token(node.opening_loc), + visit(node.body), + token(node.closing_loc) + ) + end + + # foo => ^(bar) + # ^^^^^^ + def visit_pinned_expression_node(node) + expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc)) + builder.pin(token(node.operator_loc), expression) + end + + # foo = 1 and bar => ^foo + # ^^^^ + def visit_pinned_variable_node(node) + builder.pin(token(node.operator_loc), visit(node.variable)) + end + + # END {} + def visit_post_execution_node(node) + builder.postexe( + token(node.keyword_loc), + token(node.opening_loc), + visit(node.statements), + token(node.closing_loc) + ) + end + + # BEGIN {} + def visit_pre_execution_node(node) + builder.preexe( + token(node.keyword_loc), + token(node.opening_loc), + visit(node.statements), + token(node.closing_loc) + ) + end + + # The top-level program node. + def visit_program_node(node) + visit(node.statements) + end + + # 0..5 + # ^^^^ + def visit_range_node(node) + if node.exclude_end? + builder.range_exclusive( + visit(node.left), + token(node.operator_loc), + visit(node.right) + ) + else + builder.range_inclusive( + visit(node.left), + token(node.operator_loc), + visit(node.right) + ) + end + end + + # if foo .. bar; end + # ^^^^^^^^^^ + alias visit_flip_flop_node visit_range_node + + # 1r + # ^^ + def visit_rational_node(node) + visit_numeric(node, builder.rational([node.value, srange(node.location)])) + end + + # redo + # ^^^^ + def visit_redo_node(node) + builder.keyword_cmd(:redo, token(node.location)) + end + + # /foo/ + # ^^^^^ + def visit_regular_expression_node(node) + builder.regexp_compose( + token(node.opening_loc), + [builder.string_internal(token(node.content_loc))], + [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)], + builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)]) + ) + end + + # if /foo/ then end + # ^^^^^ + alias visit_match_last_line_node visit_regular_expression_node + + # def foo(bar:); end + # ^^^^ + def visit_required_keyword_parameter_node(node) + builder.kwarg([node.name, srange(node.name_loc)]) + end + + # def foo(bar); end + # ^^^ + def visit_required_parameter_node(node) + builder.arg(token(node.location)) + end + + # foo rescue bar + # ^^^^^^^^^^^^^^ + def visit_rescue_modifier_node(node) + builder.begin_body( + visit(node.expression), + [ + builder.rescue_body( + token(node.keyword_loc), + nil, + nil, + nil, + nil, + visit(node.rescue_expression) + ) + ] + ) + end + + # begin; rescue; end + # ^^^^^^^ + def visit_rescue_node(node) + raise CompilationError, "Cannot directly compile rescue nodes" + end + + # def foo(*bar); end + # ^^^^ + # + # def foo(*); end + # ^ + def visit_rest_parameter_node(node) + builder.restarg(token(node.operator_loc), token(node.name_loc)) + end + + # retry + # ^^^^^ + def visit_retry_node(node) + builder.keyword_cmd(:retry, token(node.location)) + end + + # return + # ^^^^^^ + # + # return 1 + # ^^^^^^^^ + def visit_return_node(node) + builder.keyword_cmd( + :return, + token(node.keyword_loc), + nil, + visit(node.arguments) || [], + nil + ) + end + + # self + # ^^^^ + def visit_self_node(node) + builder.self(token(node.location)) + end + + # A shareable constant. + def visit_shareable_constant_node(node) + visit(node.write) + end + + # class << self; end + # ^^^^^^^^^^^^^^^^^^ + def visit_singleton_class_node(node) + builder.def_sclass( + token(node.class_keyword_loc), + token(node.operator_loc), + visit(node.expression), + node.body&.accept(copy_compiler(forwarding: [])), + token(node.end_keyword_loc) + ) + end + + # __ENCODING__ + # ^^^^^^^^^^^^ + def visit_source_encoding_node(node) + builder.accessible(builder.__ENCODING__(token(node.location))) + end + + # __FILE__ + # ^^^^^^^^ + def visit_source_file_node(node) + builder.accessible(builder.__FILE__(token(node.location))) + end + + # __LINE__ + # ^^^^^^^^ + def visit_source_line_node(node) + builder.accessible(builder.__LINE__(token(node.location))) + end + + # foo(*bar) + # ^^^^ + # + # def foo((bar, *baz)); end + # ^^^^ + # + # def foo(*); bar(*); end + # ^ + def visit_splat_node(node) + if node.expression.nil? && forwarding.include?(:*) + builder.forwarded_restarg(token(node.operator_loc)) + elsif in_destructure + builder.restarg(token(node.operator_loc), token(node.expression&.location)) + elsif in_pattern + builder.match_rest(token(node.operator_loc), token(node.expression&.location)) + else + builder.splat(token(node.operator_loc), visit(node.expression)) + end + end + + # A list of statements. + def visit_statements_node(node) + builder.compstmt(visit_all(node.body)) + end + + # "foo" + # ^^^^^ + def visit_string_node(node) + if node.heredoc? + children, closing = visit_heredoc(node.to_interpolated) + builder.string_compose(token(node.opening_loc), children, closing) + elsif node.opening == "?" + builder.character([node.unescaped, srange(node.location)]) + else + content_lines = node.content.lines + unescaped_lines = node.unescaped.lines + + parts = + if content_lines.length <= 1 || unescaped_lines.length <= 1 + [builder.string_internal([node.unescaped, srange(node.content_loc)])] + elsif content_lines.length != unescaped_lines.length + # This occurs when we have line continuations in the string. We + # need to come back and fix this, but for now this stops the + # code from breaking when we encounter it because of trying to + # transpose arrays of different lengths. + [builder.string_internal([node.unescaped, srange(node.content_loc)])] + else + start_offset = node.content_loc.start_offset + + [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line| + end_offset = start_offset + content_line.length + offsets = srange_offsets(start_offset, end_offset) + start_offset = end_offset + + builder.string_internal([unescaped_line, offsets]) + end + end + + builder.string_compose( + token(node.opening_loc), + parts, + token(node.closing_loc) + ) + end + end + + # super(foo) + # ^^^^^^^^^^ + def visit_super_node(node) + arguments = node.arguments&.arguments || [] + block = node.block + + if block.is_a?(BlockArgumentNode) + arguments = [*arguments, block] + block = nil + end + + visit_block( + builder.keyword_cmd( + :super, + token(node.keyword_loc), + token(node.lparen_loc), + visit_all(arguments), + token(node.rparen_loc) + ), + block + ) + end + + # :foo + # ^^^^ + def visit_symbol_node(node) + if node.closing_loc.nil? + if node.opening_loc.nil? + builder.symbol_internal([node.unescaped, srange(node.location)]) + else + builder.symbol([node.unescaped, srange(node.location)]) + end + else + parts = if node.value.lines.one? + [builder.string_internal([node.unescaped, srange(node.value_loc)])] + else + start_offset = node.value_loc.start_offset + + node.value.lines.map do |line| + end_offset = start_offset + line.length + offsets = srange_offsets(start_offset, end_offset) + start_offset = end_offset + + builder.string_internal([line, offsets]) + end + end + + builder.symbol_compose( + token(node.opening_loc), + parts, + token(node.closing_loc) + ) + end + end + + # true + # ^^^^ + def visit_true_node(node) + builder.true(token(node.location)) + end + + # undef foo + # ^^^^^^^^^ + def visit_undef_node(node) + builder.undef_method(token(node.keyword_loc), visit_all(node.names)) + end + + # unless foo; bar end + # ^^^^^^^^^^^^^^^^^^^ + # + # bar unless foo + # ^^^^^^^^^^^^^^ + def visit_unless_node(node) + if node.keyword_loc.start_offset == node.location.start_offset + builder.condition( + token(node.keyword_loc), + visit(node.predicate), + if node.then_keyword_loc + token(node.then_keyword_loc) + else + srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"]) + end, + visit(node.consequent), + token(node.consequent&.else_keyword_loc), + visit(node.statements), + token(node.end_keyword_loc) + ) + else + builder.condition_mod( + visit(node.consequent), + visit(node.statements), + token(node.keyword_loc), + visit(node.predicate) + ) + end + end + + # until foo; bar end + # ^^^^^^^^^^^^^^^^^^ + # + # bar until foo + # ^^^^^^^^^^^^^ + def visit_until_node(node) + if node.location.start_offset == node.keyword_loc.start_offset + builder.loop( + :until, + token(node.keyword_loc), + visit(node.predicate), + srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]), + visit(node.statements), + token(node.closing_loc) + ) + else + builder.loop_mod( + :until, + visit(node.statements), + token(node.keyword_loc), + visit(node.predicate) + ) + end + end + + # case foo; when bar; end + # ^^^^^^^^^^^^^ + def visit_when_node(node) + builder.when( + token(node.keyword_loc), + visit_all(node.conditions), + if node.then_keyword_loc + token(node.then_keyword_loc) + else + srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, [";"]) + end, + visit(node.statements) + ) + end + + # while foo; bar end + # ^^^^^^^^^^^^^^^^^^ + # + # bar while foo + # ^^^^^^^^^^^^^ + def visit_while_node(node) + if node.location.start_offset == node.keyword_loc.start_offset + builder.loop( + :while, + token(node.keyword_loc), + visit(node.predicate), + srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]), + visit(node.statements), + token(node.closing_loc) + ) + else + builder.loop_mod( + :while, + visit(node.statements), + token(node.keyword_loc), + visit(node.predicate) + ) + end + end + + # `foo` + # ^^^^^ + def visit_x_string_node(node) + if node.heredoc? + children, closing = visit_heredoc(node.to_interpolated) + builder.xstring_compose(token(node.opening_loc), children, closing) + else + parts = if node.unescaped.lines.one? + [builder.string_internal([node.unescaped, srange(node.content_loc)])] + else + start_offset = node.content_loc.start_offset + + node.unescaped.lines.map do |line| + end_offset = start_offset + line.length + offsets = srange_offsets(start_offset, end_offset) + start_offset = end_offset + + builder.string_internal([line, offsets]) + end + end + + builder.xstring_compose( + token(node.opening_loc), + parts, + token(node.closing_loc) + ) + end + end + + # yield + # ^^^^^ + # + # yield 1 + # ^^^^^^^ + def visit_yield_node(node) + builder.keyword_cmd( + :yield, + token(node.keyword_loc), + token(node.lparen_loc), + visit(node.arguments) || [], + token(node.rparen_loc) + ) + end + + private + + # Initialize a new compiler with the given option overrides, used to + # visit a subtree with the given options. + def copy_compiler(forwarding: self.forwarding, in_destructure: self.in_destructure, in_pattern: self.in_pattern) + Compiler.new(parser, offset_cache, forwarding: forwarding, in_destructure: in_destructure, in_pattern: in_pattern) + end + + # When *, **, &, or ... are used as an argument in a method call, we + # check if they were allowed by the current context. To determine that + # we build this lookup table. + def find_forwarding(node) + return [] if node.nil? + + forwarding = [] + forwarding << :* if node.rest.is_a?(RestParameterNode) && node.rest.name.nil? + forwarding << :** if node.keyword_rest.is_a?(KeywordRestParameterNode) && node.keyword_rest.name.nil? + forwarding << :& if !node.block.nil? && node.block.name.nil? + forwarding |= [:&, :"..."] if node.keyword_rest.is_a?(ForwardingParameterNode) + + forwarding + end + + # Negate the value of a numeric node. This is a special case where you + # have a negative sign on one line and then a number on the next line. + # In normal Ruby, this will always be a method call. The parser gem, + # however, marks this as a numeric literal. We have to massage the tree + # here to get it into the correct form. + def numeric_negate(message_loc, receiver) + case receiver.type + when :integer_node, :float_node + receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location)) + when :rational_node + receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location)) + when :imaginary_node + receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location)) + end + end + + # Blocks can have a special set of parameters that automatically expand + # when given arrays if they have a single required parameter and no + # other parameters. + def procarg0?(parameters) + parameters && + parameters.requireds.length == 1 && + parameters.optionals.empty? && + parameters.rest.nil? && + parameters.posts.empty? && + parameters.keywords.empty? && + parameters.keyword_rest.nil? && + parameters.block.nil? + end + + # Locations in the parser gem AST are generated using this class. We + # store a reference to its constant to make it slightly faster to look + # up. + Range = ::Parser::Source::Range + + # Constructs a new source range from the given start and end offsets. + def srange(location) + Range.new(source_buffer, offset_cache[location.start_offset], offset_cache[location.end_offset]) if location + end + + # Constructs a new source range from the given start and end offsets. + def srange_offsets(start_offset, end_offset) + Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset]) + end + + # Constructs a new source range by finding the given tokens between the + # given start offset and end offset. If the needle is not found, it + # returns nil. Importantly it does not search past newlines or comments. + # + # Note that end_offset is allowed to be nil, in which case this will + # search until the end of the string. + def srange_find(start_offset, end_offset, tokens) + if (match = source_buffer.source.byteslice(start_offset...end_offset).match(/(\s*)(#{tokens.join("|")})/)) + _, whitespace, token = *match + token_offset = start_offset + whitespace.bytesize + + [token, Range.new(source_buffer, offset_cache[token_offset], offset_cache[token_offset + token.bytesize])] + end + end + + # Transform a location into a token that the parser gem expects. + def token(location) + [location.slice, Range.new(source_buffer, offset_cache[location.start_offset], offset_cache[location.end_offset])] if location + end + + # Visit a block node on a call. + def visit_block(call, block) + if block + parameters = block.parameters + implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode) + + builder.block( + call, + token(block.opening_loc), + if parameters.nil? + builder.args(nil, [], nil, false) + elsif implicit_parameters + visit(parameters) + else + builder.args( + token(parameters.opening_loc), + if procarg0?(parameters.parameters) + parameter = parameters.parameters.requireds.first + [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals)) + else + visit(parameters) + end, + token(parameters.closing_loc), + false + ) + end, + block.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))), + token(block.closing_loc) + ) + else + call + end + end + + # Visit a heredoc that can be either a string or an xstring. + def visit_heredoc(node) + children = Array.new + node.parts.each do |part| + pushing = + if part.is_a?(StringNode) && part.unescaped.include?("\n") + unescaped = part.unescaped.lines(chomp: true) + escaped = part.content.lines(chomp: true) + + escaped_lengths = + if node.opening.end_with?("'") + escaped.map { |line| line.bytesize + 1 } + else + escaped.chunk_while { |before, after| before.match?(/(?<!\\)\\$/) }.map { |line| line.join.bytesize + line.length } + end + + start_offset = part.location.start_offset + end_offset = nil + + unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length| + end_offset = start_offset + (escaped_length || 0) + inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)]) + start_offset = end_offset + inner_part + end + else + [visit(part)] + end + + pushing.each do |child| + if child.type == :str && child.children.last == "" + # nothing + elsif child.type == :str && children.last && children.last.type == :str && !children.last.children.first.end_with?("\n") + children.last.children.first << child.children.first + else + children << child + end + end + end + + closing = node.closing + closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))] + + [children, closing_t] + end + + # Visit a numeric node and account for the optional sign. + def visit_numeric(node, value) + if (slice = node.slice).match?(/^[+-]/) + builder.unary_num( + [slice[0].to_sym, srange_offsets(node.location.start_offset, node.location.start_offset + 1)], + value + ) + else + value + end + end + + # Within the given block, track that we're within a pattern. + def within_pattern + begin + parser.pattern_variables.push + yield copy_compiler(in_pattern: true) + ensure + parser.pattern_variables.pop + end + end + end + end + end +end diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb new file mode 100644 index 0000000000..9d7caae0ba --- /dev/null +++ b/lib/prism/translation/parser/lexer.rb @@ -0,0 +1,416 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Parser + # Accepts a list of prism tokens and converts them into the expected + # format for the parser gem. + class Lexer + # The direct translating of types between the two lexers. + TYPES = { + # These tokens should never appear in the output of the lexer. + EOF: nil, + MISSING: nil, + NOT_PROVIDED: nil, + IGNORED_NEWLINE: nil, + EMBDOC_END: nil, + EMBDOC_LINE: nil, + __END__: nil, + + # These tokens have more or less direct mappings. + AMPERSAND: :tAMPER2, + AMPERSAND_AMPERSAND: :tANDOP, + AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN, + AMPERSAND_DOT: :tANDDOT, + AMPERSAND_EQUAL: :tOP_ASGN, + BACK_REFERENCE: :tBACK_REF, + BACKTICK: :tXSTRING_BEG, + BANG: :tBANG, + BANG_EQUAL: :tNEQ, + BANG_TILDE: :tNMATCH, + BRACE_LEFT: :tLCURLY, + BRACE_RIGHT: :tRCURLY, + BRACKET_LEFT: :tLBRACK2, + BRACKET_LEFT_ARRAY: :tLBRACK, + BRACKET_LEFT_RIGHT: :tAREF, + BRACKET_LEFT_RIGHT_EQUAL: :tASET, + BRACKET_RIGHT: :tRBRACK, + CARET: :tCARET, + CARET_EQUAL: :tOP_ASGN, + CHARACTER_LITERAL: :tCHARACTER, + CLASS_VARIABLE: :tCVAR, + COLON: :tCOLON, + COLON_COLON: :tCOLON2, + COMMA: :tCOMMA, + COMMENT: :tCOMMENT, + CONSTANT: :tCONSTANT, + DOT: :tDOT, + DOT_DOT: :tDOT2, + DOT_DOT_DOT: :tDOT3, + EMBDOC_BEGIN: :tCOMMENT, + EMBEXPR_BEGIN: :tSTRING_DBEG, + EMBEXPR_END: :tSTRING_DEND, + EMBVAR: :tSTRING_DVAR, + EQUAL: :tEQL, + EQUAL_EQUAL: :tEQ, + EQUAL_EQUAL_EQUAL: :tEQQ, + EQUAL_GREATER: :tASSOC, + EQUAL_TILDE: :tMATCH, + FLOAT: :tFLOAT, + FLOAT_IMAGINARY: :tIMAGINARY, + FLOAT_RATIONAL: :tRATIONAL, + FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY, + GLOBAL_VARIABLE: :tGVAR, + GREATER: :tGT, + GREATER_EQUAL: :tGEQ, + GREATER_GREATER: :tRSHFT, + GREATER_GREATER_EQUAL: :tOP_ASGN, + HEREDOC_START: :tSTRING_BEG, + HEREDOC_END: :tSTRING_END, + IDENTIFIER: :tIDENTIFIER, + INSTANCE_VARIABLE: :tIVAR, + INTEGER: :tINTEGER, + INTEGER_IMAGINARY: :tIMAGINARY, + INTEGER_RATIONAL: :tRATIONAL, + INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY, + KEYWORD_ALIAS: :kALIAS, + KEYWORD_AND: :kAND, + KEYWORD_BEGIN: :kBEGIN, + KEYWORD_BEGIN_UPCASE: :klBEGIN, + KEYWORD_BREAK: :kBREAK, + KEYWORD_CASE: :kCASE, + KEYWORD_CLASS: :kCLASS, + KEYWORD_DEF: :kDEF, + KEYWORD_DEFINED: :kDEFINED, + KEYWORD_DO: :kDO, + KEYWORD_DO_LOOP: :kDO_COND, + KEYWORD_END: :kEND, + KEYWORD_END_UPCASE: :klEND, + KEYWORD_ENSURE: :kENSURE, + KEYWORD_ELSE: :kELSE, + KEYWORD_ELSIF: :kELSIF, + KEYWORD_FALSE: :kFALSE, + KEYWORD_FOR: :kFOR, + KEYWORD_IF: :kIF, + KEYWORD_IF_MODIFIER: :kIF_MOD, + KEYWORD_IN: :kIN, + KEYWORD_MODULE: :kMODULE, + KEYWORD_NEXT: :kNEXT, + KEYWORD_NIL: :kNIL, + KEYWORD_NOT: :kNOT, + KEYWORD_OR: :kOR, + KEYWORD_REDO: :kREDO, + KEYWORD_RESCUE: :kRESCUE, + KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD, + KEYWORD_RETRY: :kRETRY, + KEYWORD_RETURN: :kRETURN, + KEYWORD_SELF: :kSELF, + KEYWORD_SUPER: :kSUPER, + KEYWORD_THEN: :kTHEN, + KEYWORD_TRUE: :kTRUE, + KEYWORD_UNDEF: :kUNDEF, + KEYWORD_UNLESS: :kUNLESS, + KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD, + KEYWORD_UNTIL: :kUNTIL, + KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD, + KEYWORD_WHEN: :kWHEN, + KEYWORD_WHILE: :kWHILE, + KEYWORD_WHILE_MODIFIER: :kWHILE_MOD, + KEYWORD_YIELD: :kYIELD, + KEYWORD___ENCODING__: :k__ENCODING__, + KEYWORD___FILE__: :k__FILE__, + KEYWORD___LINE__: :k__LINE__, + LABEL: :tLABEL, + LABEL_END: :tLABEL_END, + LAMBDA_BEGIN: :tLAMBEG, + LESS: :tLT, + LESS_EQUAL: :tLEQ, + LESS_EQUAL_GREATER: :tCMP, + LESS_LESS: :tLSHFT, + LESS_LESS_EQUAL: :tOP_ASGN, + METHOD_NAME: :tFID, + MINUS: :tMINUS, + MINUS_EQUAL: :tOP_ASGN, + MINUS_GREATER: :tLAMBDA, + NEWLINE: :tNL, + NUMBERED_REFERENCE: :tNTH_REF, + PARENTHESIS_LEFT: :tLPAREN, + PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG, + PARENTHESIS_RIGHT: :tRPAREN, + PERCENT: :tPERCENT, + PERCENT_EQUAL: :tOP_ASGN, + PERCENT_LOWER_I: :tQSYMBOLS_BEG, + PERCENT_LOWER_W: :tQWORDS_BEG, + PERCENT_UPPER_I: :tSYMBOLS_BEG, + PERCENT_UPPER_W: :tWORDS_BEG, + PERCENT_LOWER_X: :tXSTRING_BEG, + PLUS: :tPLUS, + PLUS_EQUAL: :tOP_ASGN, + PIPE_EQUAL: :tOP_ASGN, + PIPE: :tPIPE, + PIPE_PIPE: :tOROP, + PIPE_PIPE_EQUAL: :tOP_ASGN, + QUESTION_MARK: :tEH, + REGEXP_BEGIN: :tREGEXP_BEG, + REGEXP_END: :tSTRING_END, + SEMICOLON: :tSEMI, + SLASH: :tDIVIDE, + SLASH_EQUAL: :tOP_ASGN, + STAR: :tSTAR2, + STAR_EQUAL: :tOP_ASGN, + STAR_STAR: :tPOW, + STAR_STAR_EQUAL: :tOP_ASGN, + STRING_BEGIN: :tSTRING_BEG, + STRING_CONTENT: :tSTRING_CONTENT, + STRING_END: :tSTRING_END, + SYMBOL_BEGIN: :tSYMBEG, + TILDE: :tTILDE, + UAMPERSAND: :tAMPER, + UCOLON_COLON: :tCOLON3, + UDOT_DOT: :tBDOT2, + UDOT_DOT_DOT: :tBDOT3, + UMINUS: :tUMINUS, + UMINUS_NUM: :tUNARY_NUM, + UPLUS: :tUPLUS, + USTAR: :tSTAR, + USTAR_STAR: :tPOW, + WORDS_SEP: :tSPACE + } + + # These constants represent flags in our lex state. We really, really + # don't want to be using them and we really, really don't want to be + # exposing them as part of our public API. Unfortunately, we don't have + # another way of matching the exact tokens that the parser gem expects + # without them. We should find another way to do this, but in the + # meantime we'll hide them from the documentation and mark them as + # private constants. + EXPR_BEG = 0x1 # :nodoc: + EXPR_LABEL = 0x400 # :nodoc: + + private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL + + # The Parser::Source::Buffer that the tokens were lexed from. + attr_reader :source_buffer + + # An array of tuples that contain prism tokens and their associated lex + # state when they were lexed. + attr_reader :lexed + + # A hash that maps offsets in bytes to offsets in characters. + attr_reader :offset_cache + + # Initialize the lexer with the given source buffer, prism tokens, and + # offset cache. + def initialize(source_buffer, lexed, offset_cache) + @source_buffer = source_buffer + @lexed = lexed + @offset_cache = offset_cache + end + + Range = ::Parser::Source::Range # :nodoc: + private_constant :Range + + # Convert the prism tokens into the expected format for the parser gem. + def to_a + tokens = [] + + index = 0 + length = lexed.length + + heredoc_identifier_stack = [] + + while index < length + token, state = lexed[index] + index += 1 + next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) + + type = TYPES.fetch(token.type) + value = token.value + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset]) + + case type + when :tCHARACTER + value.delete_prefix!("?") + when :tCOMMENT + if token.type == :EMBDOC_BEGIN + start_index = index + + while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) + value += next_token.value + index += 1 + end + + if start_index != index + value += next_token.value + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset]) + index += 1 + end + else + value.chomp! + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1]) + end + when :tNL + value = nil + when :tFLOAT + value = parse_float(value) + when :tIMAGINARY + value = parse_complex(value) + when :tINTEGER + if value.start_with?("+") + tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]] + location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset]) + end + + value = parse_integer(value) + when :tLABEL + value.chomp!(":") + when :tLABEL_END + value.chomp!(":") + when :tLCURLY + type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL + when :tNTH_REF + value = parse_integer(value.delete_prefix("$")) + when :tOP_ASGN + value.chomp!("=") + when :tRATIONAL + value = parse_rational(value) + when :tSPACE + value = nil + when :tSTRING_BEG + if token.type == :HEREDOC_START + heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier]) + end + if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END + next_location = token.location.join(next_token.location) + type = :tSTRING + value = "" + location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) + index += 1 + elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END + next_location = token.location.join(next_next_token.location) + type = :tSTRING + value = next_token.value.gsub("\\\\", "\\") + location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) + index += 2 + elsif value.start_with?("<<") + quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2] + if quote == "`" + type = :tXSTRING_BEG + value = "<<`" + else + value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}" + end + end + when :tSTRING_CONTENT + unless (lines = token.value.lines).one? + start_offset = offset_cache[token.location.start_offset] + lines.map do |line| + newline = line.end_with?("\r\n") ? "\r\n" : "\n" + chomped_line = line.chomp + if match = chomped_line.match(/(?<backslashes>\\+)\z/) + adjustment = match[:backslashes].size / 2 + adjusted_line = chomped_line.delete_suffix("\\" * adjustment) + if match[:backslashes].size.odd? + adjusted_line.delete_suffix!("\\") + adjustment += 2 + else + adjusted_line << newline + end + else + adjusted_line = line + adjustment = 0 + end + + end_offset = start_offset + adjusted_line.length + adjustment + tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] + start_offset = end_offset + end + next + end + when :tSTRING_DVAR + value = nil + when :tSTRING_END + if token.type == :HEREDOC_END && value.end_with?("\n") + newline_length = value.end_with?("\r\n") ? 2 : 1 + value = heredoc_identifier_stack.pop + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length]) + elsif token.type == :REGEXP_END + value = value[0] + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1]) + end + when :tSYMBEG + if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR + next_location = token.location.join(next_token.location) + type = :tSYMBOL + value = next_token.value + value = { "~@" => "~", "!@" => "!" }.fetch(value, value) + location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) + index += 1 + end + when :tFID + if !tokens.empty? && tokens.dig(-1, 0) == :kDEF + type = :tIDENTIFIER + end + when :tXSTRING_BEG + if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END + type = :tBACK_REF2 + end + end + + tokens << [type, [value, location]] + + if token.type == :REGEXP_END + tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]] + end + end + + tokens + end + + private + + # Parse an integer from the string representation. + def parse_integer(value) + Integer(value) + rescue ArgumentError + 0 + end + + # Parse a float from the string representation. + def parse_float(value) + Float(value) + rescue ArgumentError + 0.0 + end + + # Parse a complex from the string representation. + def parse_complex(value) + value.chomp!("i") + + if value.end_with?("r") + Complex(0, parse_rational(value)) + elsif value.start_with?(/0[BbOoDdXx]/) + Complex(0, parse_integer(value)) + else + Complex(0, value) + end + rescue ArgumentError + 0i + end + + # Parse a rational from the string representation. + def parse_rational(value) + value.chomp!("r") + + if value.start_with?(/0[BbOoDdXx]/) + Rational(parse_integer(value)) + else + Rational(value) + end + rescue ArgumentError + 0r + end + end + end + end +end diff --git a/lib/prism/translation/parser/rubocop.rb b/lib/prism/translation/parser/rubocop.rb new file mode 100644 index 0000000000..6c9687a5cc --- /dev/null +++ b/lib/prism/translation/parser/rubocop.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true +# typed: ignore + +warn "WARN: Prism is directly supported since RuboCop 1.62. The `prism/translation/parser/rubocop` file is deprecated." + +require "parser" +require "rubocop" + +require_relative "../../prism" +require_relative "../parser" + +module Prism + module Translation + class Parser + # This is the special version numbers that should be used in RuboCop + # configuration files to trigger using prism. + + # For Ruby 3.3 + VERSION_3_3 = 80_82_73_83_77.33 + + # For Ruby 3.4 + VERSION_3_4 = 80_82_73_83_77.34 + + # This module gets prepended into RuboCop::AST::ProcessedSource. + module ProcessedSource + # This condition is compatible with rubocop-ast versions up to 1.30.0. + if RuboCop::AST::ProcessedSource.instance_method(:parser_class).arity == 1 + # Redefine parser_class so that we can inject the prism parser into the + # list of known parsers. + def parser_class(ruby_version) + if ruby_version == Prism::Translation::Parser::VERSION_3_3 + warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.33` is deprecated. " \ + "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.3` instead." + require_relative "../parser33" + Prism::Translation::Parser33 + elsif ruby_version == Prism::Translation::Parser::VERSION_3_4 + warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.34` is deprecated. " \ + "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.4` instead." + require_relative "../parser34" + Prism::Translation::Parser34 + else + super + end + end + else + # Redefine parser_class so that we can inject the prism parser into the + # list of known parsers. + def parser_class(ruby_version, _parser_engine) + if ruby_version == Prism::Translation::Parser::VERSION_3_3 + warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.33` is deprecated. " \ + "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.3` instead." + require_relative "../parser33" + Prism::Translation::Parser33 + elsif ruby_version == Prism::Translation::Parser::VERSION_3_4 + warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.34` is deprecated. " \ + "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.4` instead." + require_relative "../parser34" + Prism::Translation::Parser34 + else + super + end + end + end + end + end + end +end + +# :stopdoc: +RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource) +known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES) +RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES) +RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb new file mode 100644 index 0000000000..b09266e06a --- /dev/null +++ b/lib/prism/translation/parser33.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Prism + module Translation + # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. + class Parser33 < Parser + def version # :nodoc: + 33 + end + end + end +end diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb new file mode 100644 index 0000000000..0ead70ad3c --- /dev/null +++ b/lib/prism/translation/parser34.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Prism + module Translation + # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. + class Parser34 < Parser + def version # :nodoc: + 34 + end + end + end +end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb new file mode 100644 index 0000000000..79ba0e7ab3 --- /dev/null +++ b/lib/prism/translation/ripper.rb @@ -0,0 +1,3452 @@ +# frozen_string_literal: true + +require "ripper" + +module Prism + module Translation + # This class provides a compatibility layer between prism and Ripper. It + # functions by parsing the entire tree first and then walking it and + # executing each of the Ripper callbacks as it goes. To use this class, you + # treat `Prism::Translation::Ripper` effectively as you would treat the + # `Ripper` class. + # + # Note that this class will serve the most common use cases, but Ripper's + # API is extensive and undocumented. It relies on reporting the state of the + # parser at any given time. We do our best to replicate that here, but + # because it is a different architecture it is not possible to perfectly + # replicate the behavior of Ripper. + # + # The main known difference is that we may omit dispatching some events in + # some cases. This impacts the following events: + # + # - on_assign_error + # - on_comma + # - on_ignored_nl + # - on_ignored_sp + # - on_kw + # - on_label_end + # - on_lbrace + # - on_lbracket + # - on_lparen + # - on_nl + # - on_op + # - on_operator_ambiguous + # - on_rbrace + # - on_rbracket + # - on_rparen + # - on_semicolon + # - on_sp + # - on_symbeg + # - on_tstring_beg + # - on_tstring_end + # + class Ripper < Compiler + # Parses the given Ruby program read from +src+. + # +src+ must be a String or an IO or a object with a #gets method. + def self.parse(src, filename = "(ripper)", lineno = 1) + new(src, filename, lineno).parse + end + + # Tokenizes the Ruby program and returns an array of an array, + # which is formatted like + # <code>[[lineno, column], type, token, state]</code>. + # The +filename+ argument is mostly ignored. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # require "ripper" + # require "pp" + # + # pp Ripper.lex("def m(a) nil end") + # #=> [[[1, 0], :on_kw, "def", FNAME ], + # [[1, 3], :on_sp, " ", FNAME ], + # [[1, 4], :on_ident, "m", ENDFN ], + # [[1, 5], :on_lparen, "(", BEG|LABEL], + # [[1, 6], :on_ident, "a", ARG ], + # [[1, 7], :on_rparen, ")", ENDFN ], + # [[1, 8], :on_sp, " ", BEG ], + # [[1, 9], :on_kw, "nil", END ], + # [[1, 12], :on_sp, " ", END ], + # [[1, 13], :on_kw, "end", END ]] + # + def self.lex(src, filename = "-", lineno = 1, raise_errors: false) + result = Prism.lex_compat(src, filepath: filename, line: lineno) + + if result.failure? && raise_errors + raise SyntaxError, result.errors.first.message + else + result.value + end + end + + # This contains a table of all of the parser events and their + # corresponding arity. + PARSER_EVENT_TABLE = { + BEGIN: 1, + END: 1, + alias: 2, + alias_error: 2, + aref: 2, + aref_field: 2, + arg_ambiguous: 1, + arg_paren: 1, + args_add: 2, + args_add_block: 2, + args_add_star: 2, + args_forward: 0, + args_new: 0, + array: 1, + aryptn: 4, + assign: 2, + assign_error: 2, + assoc_new: 2, + assoc_splat: 1, + assoclist_from_args: 1, + bare_assoc_hash: 1, + begin: 1, + binary: 3, + block_var: 2, + blockarg: 1, + bodystmt: 4, + brace_block: 2, + break: 1, + call: 3, + case: 2, + class: 3, + class_name_error: 2, + command: 2, + command_call: 4, + const_path_field: 2, + const_path_ref: 2, + const_ref: 1, + def: 3, + defined: 1, + defs: 5, + do_block: 2, + dot2: 2, + dot3: 2, + dyna_symbol: 1, + else: 1, + elsif: 3, + ensure: 1, + excessed_comma: 0, + fcall: 1, + field: 3, + fndptn: 4, + for: 3, + hash: 1, + heredoc_dedent: 2, + hshptn: 3, + if: 3, + if_mod: 2, + ifop: 3, + in: 3, + kwrest_param: 1, + lambda: 2, + magic_comment: 2, + massign: 2, + method_add_arg: 2, + method_add_block: 2, + mlhs_add: 2, + mlhs_add_post: 2, + mlhs_add_star: 2, + mlhs_new: 0, + mlhs_paren: 1, + module: 2, + mrhs_add: 2, + mrhs_add_star: 2, + mrhs_new: 0, + mrhs_new_from_args: 1, + next: 1, + nokw_param: 1, + opassign: 3, + operator_ambiguous: 2, + param_error: 2, + params: 7, + paren: 1, + parse_error: 1, + program: 1, + qsymbols_add: 2, + qsymbols_new: 0, + qwords_add: 2, + qwords_new: 0, + redo: 0, + regexp_add: 2, + regexp_literal: 2, + regexp_new: 0, + rescue: 4, + rescue_mod: 2, + rest_param: 1, + retry: 0, + return: 1, + return0: 0, + sclass: 2, + stmts_add: 2, + stmts_new: 0, + string_add: 2, + string_concat: 2, + string_content: 0, + string_dvar: 1, + string_embexpr: 1, + string_literal: 1, + super: 1, + symbol: 1, + symbol_literal: 1, + symbols_add: 2, + symbols_new: 0, + top_const_field: 1, + top_const_ref: 1, + unary: 2, + undef: 1, + unless: 3, + unless_mod: 2, + until: 2, + until_mod: 2, + var_alias: 2, + var_field: 1, + var_ref: 1, + vcall: 1, + void_stmt: 0, + when: 3, + while: 2, + while_mod: 2, + word_add: 2, + word_new: 0, + words_add: 2, + words_new: 0, + xstring_add: 2, + xstring_literal: 1, + xstring_new: 0, + yield: 1, + yield0: 0, + zsuper: 0 + } + + # This contains a table of all of the scanner events and their + # corresponding arity. + SCANNER_EVENT_TABLE = { + CHAR: 1, + __end__: 1, + backref: 1, + backtick: 1, + comma: 1, + comment: 1, + const: 1, + cvar: 1, + embdoc: 1, + embdoc_beg: 1, + embdoc_end: 1, + embexpr_beg: 1, + embexpr_end: 1, + embvar: 1, + float: 1, + gvar: 1, + heredoc_beg: 1, + heredoc_end: 1, + ident: 1, + ignored_nl: 1, + imaginary: 1, + int: 1, + ivar: 1, + kw: 1, + label: 1, + label_end: 1, + lbrace: 1, + lbracket: 1, + lparen: 1, + nl: 1, + op: 1, + period: 1, + qsymbols_beg: 1, + qwords_beg: 1, + rational: 1, + rbrace: 1, + rbracket: 1, + regexp_beg: 1, + regexp_end: 1, + rparen: 1, + semicolon: 1, + sp: 1, + symbeg: 1, + symbols_beg: 1, + tlambda: 1, + tlambeg: 1, + tstring_beg: 1, + tstring_content: 1, + tstring_end: 1, + words_beg: 1, + words_sep: 1, + ignored_sp: 1 + } + + # This array contains name of parser events. + PARSER_EVENTS = PARSER_EVENT_TABLE.keys + + # This array contains name of scanner events. + SCANNER_EVENTS = SCANNER_EVENT_TABLE.keys + + # This array contains name of all ripper events. + EVENTS = PARSER_EVENTS + SCANNER_EVENTS + + # A list of all of the Ruby keywords. + KEYWORDS = [ + "alias", + "and", + "begin", + "BEGIN", + "break", + "case", + "class", + "def", + "defined?", + "do", + "else", + "elsif", + "end", + "END", + "ensure", + "false", + "for", + "if", + "in", + "module", + "next", + "nil", + "not", + "or", + "redo", + "rescue", + "retry", + "return", + "self", + "super", + "then", + "true", + "undef", + "unless", + "until", + "when", + "while", + "yield", + "__ENCODING__", + "__FILE__", + "__LINE__" + ] + + # A list of all of the Ruby binary operators. + BINARY_OPERATORS = [ + :!=, + :!~, + :=~, + :==, + :===, + :<=>, + :>, + :>=, + :<, + :<=, + :&, + :|, + :^, + :>>, + :<<, + :-, + :+, + :%, + :/, + :*, + :** + ] + + private_constant :KEYWORDS, :BINARY_OPERATORS + + # Parses +src+ and create S-exp tree. + # Returns more readable tree rather than Ripper.sexp_raw. + # This method is mainly for developer use. + # The +filename+ argument is mostly ignored. + # By default, this method does not handle syntax errors in +src+, + # returning +nil+ in such cases. Use the +raise_errors+ keyword + # to raise a SyntaxError for an error in +src+. + # + # require "ripper" + # require "pp" + # + # pp Ripper.sexp("def m(a) nil end") + # #=> [:program, + # [[:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]], + # [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]] + # + def self.sexp(src, filename = "-", lineno = 1, raise_errors: false) + builder = SexpBuilderPP.new(src, filename, lineno) + sexp = builder.parse + if builder.error? + if raise_errors + raise SyntaxError, builder.error + end + else + sexp + end + end + + # Parses +src+ and create S-exp tree. + # This method is mainly for developer use. + # The +filename+ argument is mostly ignored. + # By default, this method does not handle syntax errors in +src+, + # returning +nil+ in such cases. Use the +raise_errors+ keyword + # to raise a SyntaxError for an error in +src+. + # + # require "ripper" + # require "pp" + # + # pp Ripper.sexp_raw("def m(a) nil end") + # #=> [:program, + # [:stmts_add, + # [:stmts_new], + # [:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], + # [:bodystmt, + # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], + # nil, + # nil, + # nil]]]] + # + def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) + builder = SexpBuilder.new(src, filename, lineno) + sexp = builder.parse + if builder.error? + if raise_errors + raise SyntaxError, builder.error + end + else + sexp + end + end + + autoload :SexpBuilder, "prism/translation/ripper/sexp" + autoload :SexpBuilderPP, "prism/translation/ripper/sexp" + + # The source that is being parsed. + attr_reader :source + + # The filename of the source being parsed. + attr_reader :filename + + # The current line number of the parser. + attr_reader :lineno + + # The current column number of the parser. + attr_reader :column + + # Create a new Translation::Ripper object with the given source. + def initialize(source, filename = "(ripper)", lineno = 1) + @source = source + @filename = filename + @lineno = lineno + @column = 0 + @result = nil + end + + ########################################################################## + # Public interface + ########################################################################## + + # True if the parser encountered an error during parsing. + def error? + result.failure? + end + + # Parse the source and return the result. + def parse + result.comments.each do |comment| + location = comment.location + bounds(location) + + if comment.is_a?(InlineComment) + on_comment(comment.slice) + else + offset = location.start_offset + lines = comment.slice.lines + + lines.each_with_index do |line, index| + bounds(location.copy(start_offset: offset)) + + if index == 0 + on_embdoc_beg(line) + elsif index == lines.size - 1 + on_embdoc_end(line) + else + on_embdoc(line) + end + + offset += line.bytesize + end + end + end + + result.magic_comments.each do |magic_comment| + on_magic_comment(magic_comment.key, magic_comment.value) + end + + unless result.data_loc.nil? + on___end__(result.data_loc.slice.each_line.first) + end + + result.warnings.each do |warning| + bounds(warning.location) + + if warning.level == :default + warning(warning.message) + else + case warning.type + when :ambiguous_first_argument_plus + on_arg_ambiguous("+") + when :ambiguous_first_argument_minus + on_arg_ambiguous("-") + when :ambiguous_slash + on_arg_ambiguous("/") + else + warn(warning.message) + end + end + end + + if error? + result.errors.each do |error| + location = error.location + bounds(location) + + case error.type + when :alias_argument + on_alias_error("can't make alias for the number variables", location.slice) + when :argument_formal_class + on_param_error("formal argument cannot be a class variable", location.slice) + when :argument_format_constant + on_param_error("formal argument cannot be a constant", location.slice) + when :argument_formal_global + on_param_error("formal argument cannot be a global variable", location.slice) + when :argument_formal_ivar + on_param_error("formal argument cannot be an instance variable", location.slice) + when :class_name, :module_name + on_class_name_error("class/module name must be CONSTANT", location.slice) + else + on_parse_error(error.message) + end + end + + nil + else + result.value.accept(self) + end + end + + ########################################################################## + # Visitor methods + ########################################################################## + + # alias foo bar + # ^^^^^^^^^^^^^ + def visit_alias_method_node(node) + new_name = visit(node.new_name) + old_name = visit(node.old_name) + + bounds(node.location) + on_alias(new_name, old_name) + end + + # alias $foo $bar + # ^^^^^^^^^^^^^^^ + def visit_alias_global_variable_node(node) + new_name = visit_alias_global_variable_node_value(node.new_name) + old_name = visit_alias_global_variable_node_value(node.old_name) + + bounds(node.location) + on_var_alias(new_name, old_name) + end + + # Visit one side of an alias global variable node. + private def visit_alias_global_variable_node_value(node) + bounds(node.location) + + case node + when BackReferenceReadNode + on_backref(node.slice) + when GlobalVariableReadNode + on_gvar(node.name.to_s) + else + raise + end + end + + # foo => bar | baz + # ^^^^^^^^^ + def visit_alternation_pattern_node(node) + left = visit_pattern_node(node.left) + right = visit_pattern_node(node.right) + + bounds(node.location) + on_binary(left, :|, right) + end + + # Visit a pattern within a pattern match. This is used to bypass the + # parenthesis node that can be used to wrap patterns. + private def visit_pattern_node(node) + if node.is_a?(ParenthesesNode) + visit(node.body) + else + visit(node) + end + end + + # a and b + # ^^^^^^^ + def visit_and_node(node) + left = visit(node.left) + right = visit(node.right) + + bounds(node.location) + on_binary(left, node.operator.to_sym, right) + end + + # [] + # ^^ + def visit_array_node(node) + case (opening = node.opening) + when /^%w/ + opening_loc = node.opening_loc + bounds(opening_loc) + on_qwords_beg(opening) + + elements = on_qwords_new + previous = nil + + node.elements.each do |element| + visit_words_sep(opening_loc, previous, element) + + bounds(element.location) + elements = on_qwords_add(elements, on_tstring_content(element.content)) + + previous = element + end + + bounds(node.closing_loc) + on_tstring_end(node.closing) + when /^%i/ + opening_loc = node.opening_loc + bounds(opening_loc) + on_qsymbols_beg(opening) + + elements = on_qsymbols_new + previous = nil + + node.elements.each do |element| + visit_words_sep(opening_loc, previous, element) + + bounds(element.location) + elements = on_qsymbols_add(elements, on_tstring_content(element.value)) + + previous = element + end + + bounds(node.closing_loc) + on_tstring_end(node.closing) + when /^%W/ + opening_loc = node.opening_loc + bounds(opening_loc) + on_words_beg(opening) + + elements = on_words_new + previous = nil + + node.elements.each do |element| + visit_words_sep(opening_loc, previous, element) + + bounds(element.location) + elements = + on_words_add( + elements, + if element.is_a?(StringNode) + on_word_add(on_word_new, on_tstring_content(element.content)) + else + element.parts.inject(on_word_new) do |word, part| + word_part = + if part.is_a?(StringNode) + bounds(part.location) + on_tstring_content(part.content) + else + visit(part) + end + + on_word_add(word, word_part) + end + end + ) + + previous = element + end + + bounds(node.closing_loc) + on_tstring_end(node.closing) + when /^%I/ + opening_loc = node.opening_loc + bounds(opening_loc) + on_symbols_beg(opening) + + elements = on_symbols_new + previous = nil + + node.elements.each do |element| + visit_words_sep(opening_loc, previous, element) + + bounds(element.location) + elements = + on_symbols_add( + elements, + if element.is_a?(SymbolNode) + on_word_add(on_word_new, on_tstring_content(element.value)) + else + element.parts.inject(on_word_new) do |word, part| + word_part = + if part.is_a?(StringNode) + bounds(part.location) + on_tstring_content(part.content) + else + visit(part) + end + + on_word_add(word, word_part) + end + end + ) + + previous = element + end + + bounds(node.closing_loc) + on_tstring_end(node.closing) + else + bounds(node.opening_loc) + on_lbracket(opening) + + elements = visit_arguments(node.elements) unless node.elements.empty? + + bounds(node.closing_loc) + on_rbracket(node.closing) + end + + bounds(node.location) + on_array(elements) + end + + # Dispatch a words_sep event that contains the space between the elements + # of list literals. + private def visit_words_sep(opening_loc, previous, current) + end_offset = (previous.nil? ? opening_loc : previous.location).end_offset + start_offset = current.location.start_offset + + if end_offset != start_offset + bounds(current.location.copy(start_offset: end_offset)) + on_words_sep(source.byteslice(end_offset...start_offset)) + end + end + + # Visit a list of elements, like the elements of an array or arguments. + private def visit_arguments(elements) + bounds(elements.first.location) + elements.inject(on_args_new) do |args, element| + arg = visit(element) + bounds(element.location) + + case element + when BlockArgumentNode + on_args_add_block(args, arg) + when SplatNode + on_args_add_star(args, arg) + else + on_args_add(args, arg) + end + end + end + + # foo => [bar] + # ^^^^^ + def visit_array_pattern_node(node) + constant = visit(node.constant) + requireds = visit_all(node.requireds) if node.requireds.any? + rest = + if (rest_node = node.rest).is_a?(SplatNode) + if rest_node.expression.nil? + bounds(rest_node.location) + on_var_field(nil) + else + visit(rest_node.expression) + end + end + + posts = visit_all(node.posts) if node.posts.any? + + bounds(node.location) + on_aryptn(constant, requireds, rest, posts) + end + + # foo(bar) + # ^^^ + def visit_arguments_node(node) + arguments, _ = visit_call_node_arguments(node, nil, false) + arguments + end + + # { a: 1 } + # ^^^^ + def visit_assoc_node(node) + key = visit(node.key) + value = visit(node.value) + + bounds(node.location) + on_assoc_new(key, value) + end + + # def foo(**); bar(**); end + # ^^ + # + # { **foo } + # ^^^^^ + def visit_assoc_splat_node(node) + value = visit(node.value) + + bounds(node.location) + on_assoc_splat(value) + end + + # $+ + # ^^ + def visit_back_reference_read_node(node) + bounds(node.location) + on_backref(node.slice) + end + + # begin end + # ^^^^^^^^^ + def visit_begin_node(node) + clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false) + + bounds(node.location) + on_begin(clauses) + end + + # Visit the clauses of a begin node to form an on_bodystmt call. + private def visit_begin_node_clauses(location, node, allow_newline) + statements = + if node.statements.nil? + on_stmts_add(on_stmts_new, on_void_stmt) + else + body = node.statements.body + body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline) + + bounds(node.statements.location) + visit_statements_node_body(body) + end + + rescue_clause = visit(node.rescue_clause) + else_clause = + unless (else_clause_node = node.else_clause).nil? + else_statements = + if else_clause_node.statements.nil? + [nil] + else + body = else_clause_node.statements.body + body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) + body + end + + bounds(else_clause_node.location) + visit_statements_node_body(else_statements) + end + ensure_clause = visit(node.ensure_clause) + + bounds(node.location) + on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + end + + # Visit the body of a structure that can have either a set of statements + # or statements wrapped in rescue/else/ensure. + private def visit_body_node(location, node, allow_newline = false) + case node + when nil + bounds(location) + on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil) + when StatementsNode + body = [*node.body] + body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline) + stmts = visit_statements_node_body(body) + + bounds(node.body.first.location) + on_bodystmt(stmts, nil, nil, nil) + when BeginNode + visit_begin_node_clauses(location, node, allow_newline) + else + raise + end + end + + # foo(&bar) + # ^^^^ + def visit_block_argument_node(node) + visit(node.expression) + end + + # foo { |; bar| } + # ^^^ + def visit_block_local_variable_node(node) + bounds(node.location) + on_ident(node.name.to_s) + end + + # Visit a BlockNode. + def visit_block_node(node) + braces = node.opening == "{" + parameters = visit(node.parameters) + + body = + case node.body + when nil + bounds(node.location) + stmts = on_stmts_add(on_stmts_new, on_void_stmt) + + bounds(node.location) + braces ? stmts : on_bodystmt(stmts, nil, nil, nil) + when StatementsNode + stmts = node.body.body + stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = visit_statements_node_body(stmts) + + bounds(node.body.location) + braces ? stmts : on_bodystmt(stmts, nil, nil, nil) + when BeginNode + visit_body_node(node.parameters&.location || node.opening_loc, node.body) + else + raise + end + + if braces + bounds(node.location) + on_brace_block(parameters, body) + else + bounds(node.location) + on_do_block(parameters, body) + end + end + + # def foo(&bar); end + # ^^^^ + def visit_block_parameter_node(node) + if node.name_loc.nil? + bounds(node.location) + on_blockarg(nil) + else + bounds(node.name_loc) + name = visit_token(node.name.to_s) + + bounds(node.location) + on_blockarg(name) + end + end + + # A block's parameters. + def visit_block_parameters_node(node) + parameters = + if node.parameters.nil? + on_params(nil, nil, nil, nil, nil, nil, nil) + else + visit(node.parameters) + end + + locals = + if node.locals.any? + visit_all(node.locals) + else + false + end + + bounds(node.location) + on_block_var(parameters, locals) + end + + # break + # ^^^^^ + # + # break foo + # ^^^^^^^^^ + def visit_break_node(node) + if node.arguments.nil? + bounds(node.location) + on_break(on_args_new) + else + arguments = visit(node.arguments) + + bounds(node.location) + on_break(arguments) + end + end + + # foo + # ^^^ + # + # foo.bar + # ^^^^^^^ + # + # foo.bar() {} + # ^^^^^^^^^^^^ + def visit_call_node(node) + if node.call_operator_loc.nil? + case node.name + when :[] + receiver = visit(node.receiver) + arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.location) + call = on_aref(receiver, arguments) + + if block.nil? + call + else + bounds(node.location) + on_method_add_block(call, block) + end + when :[]= + receiver = visit(node.receiver) + + *arguments, last_argument = node.arguments.arguments + arguments << node.block if !node.block.nil? + + arguments = + if arguments.any? + args = visit_arguments(arguments) + + if !node.block.nil? + args + else + bounds(arguments.first.location) + on_args_add_block(args, false) + end + end + + bounds(node.location) + call = on_aref_field(receiver, arguments) + value = visit_write_value(last_argument) + + bounds(last_argument.location) + on_assign(call, value) + when :-@, :+@, :~ + receiver = visit(node.receiver) + + bounds(node.location) + on_unary(node.name, receiver) + when :! + receiver = visit(node.receiver) + + bounds(node.location) + on_unary(node.message == "not" ? :not : :!, receiver) + when *BINARY_OPERATORS + receiver = visit(node.receiver) + value = visit(node.arguments.arguments.first) + + bounds(node.location) + on_binary(receiver, node.name, value) + else + bounds(node.message_loc) + message = visit_token(node.message, false) + + if node.variable_call? + on_vcall(message) + else + arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + call = + if node.opening_loc.nil? && arguments&.any? + bounds(node.location) + on_command(message, arguments) + elsif !node.opening_loc.nil? + bounds(node.location) + on_method_add_arg(on_fcall(message), on_arg_paren(arguments)) + else + bounds(node.location) + on_method_add_arg(on_fcall(message), on_args_new) + end + + if block.nil? + call + else + bounds(node.block.location) + on_method_add_block(call, block) + end + end + end + else + receiver = visit(node.receiver) + + bounds(node.call_operator_loc) + call_operator = visit_token(node.call_operator) + + message = + if node.message_loc.nil? + :call + else + bounds(node.message_loc) + visit_token(node.message, false) + end + + if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil? + value = visit_write_value(node.arguments.arguments.first) + + bounds(node.location) + on_assign(on_field(receiver, call_operator, message), value) + else + arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + call = + if node.opening_loc.nil? + bounds(node.location) + + if node.arguments.nil? && !node.block.is_a?(BlockArgumentNode) + on_call(receiver, call_operator, message) + else + on_command_call(receiver, call_operator, message, arguments) + end + else + bounds(node.opening_loc) + arguments = on_arg_paren(arguments) + + bounds(node.location) + on_method_add_arg(on_call(receiver, call_operator, message), arguments) + end + + if block.nil? + call + else + bounds(node.block.location) + on_method_add_block(call, block) + end + end + end + end + + # Visit the arguments and block of a call node and return the arguments + # and block as they should be used. + private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + arguments = arguments_node&.arguments || [] + block = block_node + + if block.is_a?(BlockArgumentNode) + arguments << block + block = nil + end + + [ + if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode) + visit(arguments.first) + elsif arguments.any? + args = visit_arguments(arguments) + + if block_node.is_a?(BlockArgumentNode) || arguments.last.is_a?(ForwardingArgumentsNode) || command?(arguments.last) || trailing_comma + args + else + bounds(arguments.first.location) + on_args_add_block(args, false) + end + end, + visit(block) + ] + end + + # Returns true if the given node is a command node. + private def command?(node) + node.is_a?(CallNode) && + node.opening_loc.nil? && + (!node.arguments.nil? || node.block.is_a?(BlockArgumentNode)) && + !BINARY_OPERATORS.include?(node.name) + end + + # foo.bar += baz + # ^^^^^^^^^^^^^^^ + def visit_call_operator_write_node(node) + receiver = visit(node.receiver) + + bounds(node.call_operator_loc) + call_operator = visit_token(node.call_operator) + + bounds(node.message_loc) + message = visit_token(node.message) + + bounds(node.location) + target = on_field(receiver, call_operator, message) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo.bar &&= baz + # ^^^^^^^^^^^^^^^ + def visit_call_and_write_node(node) + receiver = visit(node.receiver) + + bounds(node.call_operator_loc) + call_operator = visit_token(node.call_operator) + + bounds(node.message_loc) + message = visit_token(node.message) + + bounds(node.location) + target = on_field(receiver, call_operator, message) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo.bar ||= baz + # ^^^^^^^^^^^^^^^ + def visit_call_or_write_node(node) + receiver = visit(node.receiver) + + bounds(node.call_operator_loc) + call_operator = visit_token(node.call_operator) + + bounds(node.message_loc) + message = visit_token(node.message) + + bounds(node.location) + target = on_field(receiver, call_operator, message) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo.bar, = 1 + # ^^^^^^^ + def visit_call_target_node(node) + if node.call_operator == "::" + receiver = visit(node.receiver) + + bounds(node.message_loc) + message = visit_token(node.message) + + bounds(node.location) + on_const_path_field(receiver, message) + else + receiver = visit(node.receiver) + + bounds(node.call_operator_loc) + call_operator = visit_token(node.call_operator) + + bounds(node.message_loc) + message = visit_token(node.message) + + bounds(node.location) + on_field(receiver, call_operator, message) + end + end + + # foo => bar => baz + # ^^^^^^^^^^ + def visit_capture_pattern_node(node) + value = visit(node.value) + target = visit(node.target) + + bounds(node.location) + on_binary(value, :"=>", target) + end + + # case foo; when bar; end + # ^^^^^^^^^^^^^^^^^^^^^^^ + def visit_case_node(node) + predicate = visit(node.predicate) + clauses = + node.conditions.reverse_each.inject(visit(node.consequent)) do |consequent, condition| + on_when(*visit(condition), consequent) + end + + bounds(node.location) + on_case(predicate, clauses) + end + + # case foo; in bar; end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_case_match_node(node) + predicate = visit(node.predicate) + clauses = + node.conditions.reverse_each.inject(visit(node.consequent)) do |consequent, condition| + on_in(*visit(condition), consequent) + end + + bounds(node.location) + on_case(predicate, clauses) + end + + # class Foo; end + # ^^^^^^^^^^^^^^ + def visit_class_node(node) + constant_path = + if node.constant_path.is_a?(ConstantReadNode) + bounds(node.constant_path.location) + on_const_ref(on_const(node.constant_path.name.to_s)) + else + visit(node.constant_path) + end + + superclass = visit(node.superclass) + bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?) + + bounds(node.location) + on_class(constant_path, superclass, bodystmt) + end + + # @@foo + # ^^^^^ + def visit_class_variable_read_node(node) + bounds(node.location) + on_var_ref(on_cvar(node.slice)) + end + + # @@foo = 1 + # ^^^^^^^^^ + # + # @@foo, @@bar = 1 + # ^^^^^ ^^^^^ + def visit_class_variable_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_cvar(node.name.to_s)) + value = visit_write_value(node.value) + + bounds(node.location) + on_assign(target, value) + end + + # @@foo += bar + # ^^^^^^^^^^^^ + def visit_class_variable_operator_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_cvar(node.name.to_s)) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # @@foo &&= bar + # ^^^^^^^^^^^^^ + def visit_class_variable_and_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_cvar(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # @@foo ||= bar + # ^^^^^^^^^^^^^ + def visit_class_variable_or_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_cvar(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # @@foo, = bar + # ^^^^^ + def visit_class_variable_target_node(node) + bounds(node.location) + on_var_field(on_cvar(node.name.to_s)) + end + + # Foo + # ^^^ + def visit_constant_read_node(node) + bounds(node.location) + on_var_ref(on_const(node.name.to_s)) + end + + # Foo = 1 + # ^^^^^^^ + # + # Foo, Bar = 1 + # ^^^ ^^^ + def visit_constant_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_const(node.name.to_s)) + value = visit_write_value(node.value) + + bounds(node.location) + on_assign(target, value) + end + + # Foo += bar + # ^^^^^^^^^^^ + def visit_constant_operator_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_const(node.name.to_s)) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # Foo &&= bar + # ^^^^^^^^^^^^ + def visit_constant_and_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_const(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # Foo ||= bar + # ^^^^^^^^^^^^ + def visit_constant_or_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_const(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # Foo, = bar + # ^^^ + def visit_constant_target_node(node) + bounds(node.location) + on_var_field(on_const(node.name.to_s)) + end + + # Foo::Bar + # ^^^^^^^^ + def visit_constant_path_node(node) + if node.parent.nil? + bounds(node.name_loc) + child = on_const(node.name.to_s) + + bounds(node.location) + on_top_const_ref(child) + else + parent = visit(node.parent) + + bounds(node.name_loc) + child = on_const(node.name.to_s) + + bounds(node.location) + on_const_path_ref(parent, child) + end + end + + # Foo::Bar = 1 + # ^^^^^^^^^^^^ + # + # Foo::Foo, Bar::Bar = 1 + # ^^^^^^^^ ^^^^^^^^ + def visit_constant_path_write_node(node) + target = visit_constant_path_write_node_target(node.target) + value = visit_write_value(node.value) + + bounds(node.location) + on_assign(target, value) + end + + # Visit a constant path that is part of a write node. + private def visit_constant_path_write_node_target(node) + if node.parent.nil? + bounds(node.name_loc) + child = on_const(node.name.to_s) + + bounds(node.location) + on_top_const_field(child) + else + parent = visit(node.parent) + + bounds(node.name_loc) + child = on_const(node.name.to_s) + + bounds(node.location) + on_const_path_field(parent, child) + end + end + + # Foo::Bar += baz + # ^^^^^^^^^^^^^^^ + def visit_constant_path_operator_write_node(node) + target = visit_constant_path_write_node_target(node.target) + value = visit(node.value) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # Foo::Bar &&= baz + # ^^^^^^^^^^^^^^^^ + def visit_constant_path_and_write_node(node) + target = visit_constant_path_write_node_target(node.target) + value = visit(node.value) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # Foo::Bar ||= baz + # ^^^^^^^^^^^^^^^^ + def visit_constant_path_or_write_node(node) + target = visit_constant_path_write_node_target(node.target) + value = visit(node.value) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # Foo::Bar, = baz + # ^^^^^^^^ + def visit_constant_path_target_node(node) + visit_constant_path_write_node_target(node) + end + + # def foo; end + # ^^^^^^^^^^^^ + # + # def self.foo; end + # ^^^^^^^^^^^^^^^^^ + def visit_def_node(node) + receiver = visit(node.receiver) + operator = + if !node.operator_loc.nil? + bounds(node.operator_loc) + visit_token(node.operator) + end + + bounds(node.name_loc) + name = visit_token(node.name_loc.slice) + + parameters = + if node.parameters.nil? + bounds(node.location) + on_params(nil, nil, nil, nil, nil, nil, nil) + else + visit(node.parameters) + end + + if !node.lparen_loc.nil? + bounds(node.lparen_loc) + parameters = on_paren(parameters) + end + + bodystmt = + if node.equal_loc.nil? + visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body) + else + body = visit(node.body.body.first) + + bounds(node.body.location) + on_bodystmt(body, nil, nil, nil) + end + + bounds(node.location) + if receiver.nil? + on_def(name, parameters, bodystmt) + else + on_defs(receiver, operator, name, parameters, bodystmt) + end + end + + # defined? a + # ^^^^^^^^^^ + # + # defined?(a) + # ^^^^^^^^^^^ + def visit_defined_node(node) + bounds(node.location) + on_defined(visit(node.value)) + end + + # if foo then bar else baz end + # ^^^^^^^^^^^^ + def visit_else_node(node) + statements = + if node.statements.nil? + [nil] + else + body = node.statements.body + body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) + body + end + + bounds(node.location) + on_else(visit_statements_node_body(statements)) + end + + # "foo #{bar}" + # ^^^^^^ + def visit_embedded_statements_node(node) + bounds(node.opening_loc) + on_embexpr_beg(node.opening) + + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + bounds(node.closing_loc) + on_embexpr_end(node.closing) + + bounds(node.location) + on_string_embexpr(statements) + end + + # "foo #@bar" + # ^^^^^ + def visit_embedded_variable_node(node) + bounds(node.operator_loc) + on_embvar(node.operator) + + variable = visit(node.variable) + + bounds(node.location) + on_string_dvar(variable) + end + + # Visit an EnsureNode node. + def visit_ensure_node(node) + statements = + if node.statements.nil? + [nil] + else + body = node.statements.body + body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false) + body + end + + statements = visit_statements_node_body(statements) + + bounds(node.location) + on_ensure(statements) + end + + # false + # ^^^^^ + def visit_false_node(node) + bounds(node.location) + on_var_ref(on_kw("false")) + end + + # foo => [*, bar, *] + # ^^^^^^^^^^^ + def visit_find_pattern_node(node) + constant = visit(node.constant) + left = + if node.left.expression.nil? + bounds(node.left.location) + on_var_field(nil) + else + visit(node.left.expression) + end + + requireds = visit_all(node.requireds) if node.requireds.any? + right = + if node.right.expression.nil? + bounds(node.right.location) + on_var_field(nil) + else + visit(node.right.expression) + end + + bounds(node.location) + on_fndptn(constant, left, requireds, right) + end + + # if foo .. bar; end + # ^^^^^^^^^^ + def visit_flip_flop_node(node) + left = visit(node.left) + right = visit(node.right) + + bounds(node.location) + if node.exclude_end? + on_dot3(left, right) + else + on_dot2(left, right) + end + end + + # 1.0 + # ^^^ + def visit_float_node(node) + visit_number_node(node) { |text| on_float(text) } + end + + # for foo in bar do end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_for_node(node) + index = visit(node.index) + collection = visit(node.collection) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + bounds(node.location) + on_for(index, collection, statements) + end + + # def foo(...); bar(...); end + # ^^^ + def visit_forwarding_arguments_node(node) + bounds(node.location) + on_args_forward + end + + # def foo(...); end + # ^^^ + def visit_forwarding_parameter_node(node) + bounds(node.location) + on_args_forward + end + + # super + # ^^^^^ + # + # super {} + # ^^^^^^^^ + def visit_forwarding_super_node(node) + if node.block.nil? + bounds(node.location) + on_zsuper + else + block = visit(node.block) + + bounds(node.location) + on_method_add_block(on_zsuper, block) + end + end + + # $foo + # ^^^^ + def visit_global_variable_read_node(node) + bounds(node.location) + on_var_ref(on_gvar(node.name.to_s)) + end + + # $foo = 1 + # ^^^^^^^^ + # + # $foo, $bar = 1 + # ^^^^ ^^^^ + def visit_global_variable_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_gvar(node.name.to_s)) + value = visit_write_value(node.value) + + bounds(node.location) + on_assign(target, value) + end + + # $foo += bar + # ^^^^^^^^^^^ + def visit_global_variable_operator_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_gvar(node.name.to_s)) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # $foo &&= bar + # ^^^^^^^^^^^^ + def visit_global_variable_and_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_gvar(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # $foo ||= bar + # ^^^^^^^^^^^^ + def visit_global_variable_or_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_gvar(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # $foo, = bar + # ^^^^ + def visit_global_variable_target_node(node) + bounds(node.location) + on_var_field(on_gvar(node.name.to_s)) + end + + # {} + # ^^ + def visit_hash_node(node) + elements = + if node.elements.any? + args = visit_all(node.elements) + + bounds(node.elements.first.location) + on_assoclist_from_args(args) + end + + bounds(node.location) + on_hash(elements) + end + + # foo => {} + # ^^ + def visit_hash_pattern_node(node) + constant = visit(node.constant) + elements = + if node.elements.any? || !node.rest.nil? + node.elements.map do |element| + [ + if (key = element.key).opening_loc.nil? + visit(key) + else + bounds(key.value_loc) + if (value = key.value).empty? + on_string_content + else + on_string_add(on_string_content, on_tstring_content(value)) + end + end, + visit(element.value) + ] + end + end + + rest = + case node.rest + when AssocSplatNode + visit(node.rest.value) + when NoKeywordsParameterNode + bounds(node.rest.location) + on_var_field(visit(node.rest)) + end + + bounds(node.location) + on_hshptn(constant, elements, rest) + end + + # if foo then bar end + # ^^^^^^^^^^^^^^^^^^^ + # + # bar if foo + # ^^^^^^^^^^ + # + # foo ? bar : baz + # ^^^^^^^^^^^^^^^ + def visit_if_node(node) + if node.then_keyword == "?" + predicate = visit(node.predicate) + truthy = visit(node.statements.body.first) + falsy = visit(node.consequent.statements.body.first) + + bounds(node.location) + on_ifop(predicate, truthy, falsy) + elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + predicate = visit(node.predicate) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + consequent = visit(node.consequent) + + bounds(node.location) + if node.if_keyword == "if" + on_if(predicate, statements, consequent) + else + on_elsif(predicate, statements, consequent) + end + else + statements = visit(node.statements.body.first) + predicate = visit(node.predicate) + + bounds(node.location) + on_if_mod(predicate, statements) + end + end + + # 1i + # ^^ + def visit_imaginary_node(node) + visit_number_node(node) { |text| on_imaginary(text) } + end + + # { foo: } + # ^^^^ + def visit_implicit_node(node) + end + + # foo { |bar,| } + # ^ + def visit_implicit_rest_node(node) + bounds(node.location) + on_excessed_comma + end + + # case foo; in bar; end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_in_node(node) + # This is a special case where we're not going to call on_in directly + # because we don't have access to the consequent. Instead, we'll return + # the component parts and let the parent node handle it. + pattern = visit_pattern_node(node.pattern) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + [pattern, statements] + end + + # foo[bar] += baz + # ^^^^^^^^^^^^^^^ + def visit_index_operator_write_node(node) + receiver = visit(node.receiver) + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.location) + target = on_aref_field(receiver, arguments) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo[bar] &&= baz + # ^^^^^^^^^^^^^^^^ + def visit_index_and_write_node(node) + receiver = visit(node.receiver) + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.location) + target = on_aref_field(receiver, arguments) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo[bar] ||= baz + # ^^^^^^^^^^^^^^^^ + def visit_index_or_write_node(node) + receiver = visit(node.receiver) + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.location) + target = on_aref_field(receiver, arguments) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo[bar], = 1 + # ^^^^^^^^ + def visit_index_target_node(node) + receiver = visit(node.receiver) + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.location) + on_aref_field(receiver, arguments) + end + + # @foo + # ^^^^ + def visit_instance_variable_read_node(node) + bounds(node.location) + on_var_ref(on_ivar(node.name.to_s)) + end + + # @foo = 1 + # ^^^^^^^^ + def visit_instance_variable_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ivar(node.name.to_s)) + value = visit_write_value(node.value) + + bounds(node.location) + on_assign(target, value) + end + + # @foo += bar + # ^^^^^^^^^^^ + def visit_instance_variable_operator_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ivar(node.name.to_s)) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # @foo &&= bar + # ^^^^^^^^^^^^ + def visit_instance_variable_and_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ivar(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # @foo ||= bar + # ^^^^^^^^^^^^ + def visit_instance_variable_or_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ivar(node.name.to_s)) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # @foo, = bar + # ^^^^ + def visit_instance_variable_target_node(node) + bounds(node.location) + on_var_field(on_ivar(node.name.to_s)) + end + + # 1 + # ^ + def visit_integer_node(node) + visit_number_node(node) { |text| on_int(text) } + end + + # if /foo #{bar}/ then end + # ^^^^^^^^^^^^ + def visit_interpolated_match_last_line_node(node) + bounds(node.opening_loc) + on_regexp_beg(node.opening) + + bounds(node.parts.first.location) + parts = + node.parts.inject(on_regexp_new) do |content, part| + on_regexp_add(content, visit_string_content(part)) + end + + bounds(node.closing_loc) + closing = on_regexp_end(node.closing) + + bounds(node.location) + on_regexp_literal(parts, closing) + end + + # /foo #{bar}/ + # ^^^^^^^^^^^^ + def visit_interpolated_regular_expression_node(node) + bounds(node.opening_loc) + on_regexp_beg(node.opening) + + bounds(node.parts.first.location) + parts = + node.parts.inject(on_regexp_new) do |content, part| + on_regexp_add(content, visit_string_content(part)) + end + + bounds(node.closing_loc) + closing = on_regexp_end(node.closing) + + bounds(node.location) + on_regexp_literal(parts, closing) + end + + # "foo #{bar}" + # ^^^^^^^^^^^^ + def visit_interpolated_string_node(node) + if node.opening&.start_with?("<<~") + heredoc = visit_heredoc_string_node(node) + + bounds(node.location) + on_string_literal(heredoc) + elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } + first, *rest = node.parts + rest.inject(visit(first)) do |content, part| + concat = visit(part) + + bounds(part.location) + on_string_concat(content, concat) + end + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end + + bounds(node.location) + on_string_literal(parts) + end + end + + # :"foo #{bar}" + # ^^^^^^^^^^^^^ + def visit_interpolated_symbol_node(node) + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end + + bounds(node.location) + on_dyna_symbol(parts) + end + + # `foo #{bar}` + # ^^^^^^^^^^^^ + def visit_interpolated_x_string_node(node) + if node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node) + + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_xstring_new) do |content, part| + on_xstring_add(content, visit_string_content(part)) + end + + bounds(node.location) + on_xstring_literal(parts) + end + end + + # Visit an individual part of a string-like node. + private def visit_string_content(part) + if part.is_a?(StringNode) + bounds(part.content_loc) + on_tstring_content(part.content) + else + visit(part) + end + end + + # -> { it } + # ^^ + def visit_it_local_variable_read_node(node) + bounds(node.location) + on_vcall(on_ident(node.slice)) + end + + # -> { it } + # ^^^^^^^^^ + def visit_it_parameters_node(node) + end + + # foo(bar: baz) + # ^^^^^^^^ + def visit_keyword_hash_node(node) + elements = visit_all(node.elements) + + bounds(node.location) + on_bare_assoc_hash(elements) + end + + # def foo(**bar); end + # ^^^^^ + # + # def foo(**); end + # ^^ + def visit_keyword_rest_parameter_node(node) + if node.name_loc.nil? + bounds(node.location) + on_kwrest_param(nil) + else + bounds(node.name_loc) + name = on_ident(node.name.to_s) + + bounds(node.location) + on_kwrest_param(name) + end + end + + # -> {} + def visit_lambda_node(node) + bounds(node.operator_loc) + on_tlambda(node.operator) + + parameters = + if node.parameters.is_a?(BlockParametersNode) + # Ripper does not track block-locals within lambdas, so we skip + # directly to the parameters here. + params = + if node.parameters.parameters.nil? + bounds(node.location) + on_params(nil, nil, nil, nil, nil, nil, nil) + else + visit(node.parameters.parameters) + end + + if node.parameters.opening_loc.nil? + params + else + bounds(node.parameters.opening_loc) + on_paren(params) + end + else + bounds(node.location) + on_params(nil, nil, nil, nil, nil, nil, nil) + end + + braces = node.opening == "{" + if braces + bounds(node.opening_loc) + on_tlambeg(node.opening) + end + + body = + case node.body + when nil + bounds(node.location) + stmts = on_stmts_add(on_stmts_new, on_void_stmt) + + bounds(node.location) + braces ? stmts : on_bodystmt(stmts, nil, nil, nil) + when StatementsNode + stmts = node.body.body + stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = visit_statements_node_body(stmts) + + bounds(node.body.location) + braces ? stmts : on_bodystmt(stmts, nil, nil, nil) + when BeginNode + visit_body_node(node.opening_loc, node.body) + else + raise + end + + bounds(node.location) + on_lambda(parameters, body) + end + + # foo + # ^^^ + def visit_local_variable_read_node(node) + bounds(node.location) + on_var_ref(on_ident(node.slice)) + end + + # foo = 1 + # ^^^^^^^ + def visit_local_variable_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ident(node.name_loc.slice)) + value = visit_write_value(node.value) + + bounds(node.location) + on_assign(target, value) + end + + # foo += bar + # ^^^^^^^^^^ + def visit_local_variable_operator_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ident(node.name_loc.slice)) + + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo &&= bar + # ^^^^^^^^^^^ + def visit_local_variable_and_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ident(node.name_loc.slice)) + + bounds(node.operator_loc) + operator = on_op("&&=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo ||= bar + # ^^^^^^^^^^^ + def visit_local_variable_or_write_node(node) + bounds(node.name_loc) + target = on_var_field(on_ident(node.name_loc.slice)) + + bounds(node.operator_loc) + operator = on_op("||=") + value = visit_write_value(node.value) + + bounds(node.location) + on_opassign(target, operator, value) + end + + # foo, = bar + # ^^^ + def visit_local_variable_target_node(node) + bounds(node.location) + on_var_field(on_ident(node.name.to_s)) + end + + # if /foo/ then end + # ^^^^^ + def visit_match_last_line_node(node) + bounds(node.opening_loc) + on_regexp_beg(node.opening) + + bounds(node.content_loc) + tstring_content = on_tstring_content(node.content) + + bounds(node.closing_loc) + closing = on_regexp_end(node.closing) + + on_regexp_literal(on_regexp_add(on_regexp_new, tstring_content), closing) + end + + # foo in bar + # ^^^^^^^^^^ + def visit_match_predicate_node(node) + value = visit(node.value) + pattern = on_in(visit_pattern_node(node.pattern), nil, nil) + + on_case(value, pattern) + end + + # foo => bar + # ^^^^^^^^^^ + def visit_match_required_node(node) + value = visit(node.value) + pattern = on_in(visit_pattern_node(node.pattern), nil, nil) + + on_case(value, pattern) + end + + # /(?<foo>foo)/ =~ bar + # ^^^^^^^^^^^^^^^^^^^^ + def visit_match_write_node(node) + visit(node.call) + end + + # A node that is missing from the syntax tree. This is only used in the + # case of a syntax error. + def visit_missing_node(node) + raise "Cannot visit missing nodes directly." + end + + # module Foo; end + # ^^^^^^^^^^^^^^^ + def visit_module_node(node) + constant_path = + if node.constant_path.is_a?(ConstantReadNode) + bounds(node.constant_path.location) + on_const_ref(on_const(node.constant_path.name.to_s)) + else + visit(node.constant_path) + end + + bodystmt = visit_body_node(node.constant_path.location, node.body, true) + + bounds(node.location) + on_module(constant_path, bodystmt) + end + + # (foo, bar), bar = qux + # ^^^^^^^^^^ + def visit_multi_target_node(node) + bounds(node.location) + targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + + if node.lparen_loc.nil? + targets + else + bounds(node.lparen_loc) + on_mlhs_paren(targets) + end + end + + # Visit the targets of a multi-target node. + private def visit_multi_target_node_targets(lefts, rest, rights, skippable) + if skippable && lefts.length == 1 && lefts.first.is_a?(MultiTargetNode) && rest.nil? && rights.empty? + return visit(lefts.first) + end + + mlhs = on_mlhs_new + + lefts.each do |left| + bounds(left.location) + mlhs = on_mlhs_add(mlhs, visit(left)) + end + + case rest + when nil + # do nothing + when ImplicitRestNode + # these do not get put into the generated tree + bounds(rest.location) + on_excessed_comma + else + bounds(rest.location) + mlhs = on_mlhs_add_star(mlhs, visit(rest)) + end + + if rights.any? + bounds(rights.first.location) + post = on_mlhs_new + + rights.each do |right| + bounds(right.location) + post = on_mlhs_add(post, visit(right)) + end + + mlhs = on_mlhs_add_post(mlhs, post) + end + + mlhs + end + + # foo, bar = baz + # ^^^^^^^^^^^^^^ + def visit_multi_write_node(node) + bounds(node.location) + targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + + unless node.lparen_loc.nil? + bounds(node.lparen_loc) + targets = on_mlhs_paren(targets) + end + + value = visit_write_value(node.value) + + bounds(node.location) + on_massign(targets, value) + end + + # next + # ^^^^ + # + # next foo + # ^^^^^^^^ + def visit_next_node(node) + if node.arguments.nil? + bounds(node.location) + on_next(on_args_new) + else + arguments = visit(node.arguments) + + bounds(node.location) + on_next(arguments) + end + end + + # nil + # ^^^ + def visit_nil_node(node) + bounds(node.location) + on_var_ref(on_kw("nil")) + end + + # def foo(**nil); end + # ^^^^^ + def visit_no_keywords_parameter_node(node) + bounds(node.location) + on_nokw_param(nil) + + :nil + end + + # -> { _1 + _2 } + # ^^^^^^^^^^^^^^ + def visit_numbered_parameters_node(node) + end + + # $1 + # ^^ + def visit_numbered_reference_read_node(node) + bounds(node.location) + on_backref(node.slice) + end + + # def foo(bar: baz); end + # ^^^^^^^^ + def visit_optional_keyword_parameter_node(node) + bounds(node.name_loc) + name = on_label("#{node.name}:") + value = visit(node.value) + + [name, value] + end + + # def foo(bar = 1); end + # ^^^^^^^ + def visit_optional_parameter_node(node) + bounds(node.name_loc) + name = visit_token(node.name.to_s) + value = visit(node.value) + + [name, value] + end + + # a or b + # ^^^^^^ + def visit_or_node(node) + left = visit(node.left) + right = visit(node.right) + + bounds(node.location) + on_binary(left, node.operator.to_sym, right) + end + + # def foo(bar, *baz); end + # ^^^^^^^^^ + def visit_parameters_node(node) + requireds = node.requireds.map { |required| required.is_a?(MultiTargetNode) ? visit_destructured_parameter_node(required) : visit(required) } if node.requireds.any? + optionals = visit_all(node.optionals) if node.optionals.any? + rest = visit(node.rest) + posts = node.posts.map { |post| post.is_a?(MultiTargetNode) ? visit_destructured_parameter_node(post) : visit(post) } if node.posts.any? + keywords = visit_all(node.keywords) if node.keywords.any? + keyword_rest = visit(node.keyword_rest) + block = visit(node.block) + + bounds(node.location) + on_params(requireds, optionals, rest, posts, keywords, keyword_rest, block) + end + + # Visit a destructured positional parameter node. + private def visit_destructured_parameter_node(node) + bounds(node.location) + targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false) + + bounds(node.lparen_loc) + on_mlhs_paren(targets) + end + + # () + # ^^ + # + # (1) + # ^^^ + def visit_parentheses_node(node) + body = + if node.body.nil? + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.body) + end + + bounds(node.location) + on_paren(body) + end + + # foo => ^(bar) + # ^^^^^^ + def visit_pinned_expression_node(node) + expression = visit(node.expression) + + bounds(node.location) + on_begin(expression) + end + + # foo = 1 and bar => ^foo + # ^^^^ + def visit_pinned_variable_node(node) + visit(node.variable) + end + + # END {} + # ^^^^^^ + def visit_post_execution_node(node) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + bounds(node.location) + on_END(statements) + end + + # BEGIN {} + # ^^^^^^^^ + def visit_pre_execution_node(node) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + bounds(node.location) + on_BEGIN(statements) + end + + # The top-level program node. + def visit_program_node(node) + body = node.statements.body + body << nil if body.empty? + statements = visit_statements_node_body(body) + + bounds(node.location) + on_program(statements) + end + + # 0..5 + # ^^^^ + def visit_range_node(node) + left = visit(node.left) + right = visit(node.right) + + bounds(node.location) + if node.exclude_end? + on_dot3(left, right) + else + on_dot2(left, right) + end + end + + # 1r + # ^^ + def visit_rational_node(node) + visit_number_node(node) { |text| on_rational(text) } + end + + # redo + # ^^^^ + def visit_redo_node(node) + bounds(node.location) + on_redo + end + + # /foo/ + # ^^^^^ + def visit_regular_expression_node(node) + bounds(node.opening_loc) + on_regexp_beg(node.opening) + + if node.content.empty? + bounds(node.closing_loc) + closing = on_regexp_end(node.closing) + + on_regexp_literal(on_regexp_new, closing) + else + bounds(node.content_loc) + tstring_content = on_tstring_content(node.content) + + bounds(node.closing_loc) + closing = on_regexp_end(node.closing) + + on_regexp_literal(on_regexp_add(on_regexp_new, tstring_content), closing) + end + end + + # def foo(bar:); end + # ^^^^ + def visit_required_keyword_parameter_node(node) + bounds(node.name_loc) + [on_label("#{node.name}:"), false] + end + + # def foo(bar); end + # ^^^ + def visit_required_parameter_node(node) + bounds(node.location) + on_ident(node.name.to_s) + end + + # foo rescue bar + # ^^^^^^^^^^^^^^ + def visit_rescue_modifier_node(node) + expression = visit_write_value(node.expression) + rescue_expression = visit(node.rescue_expression) + + bounds(node.location) + on_rescue_mod(expression, rescue_expression) + end + + # begin; rescue; end + # ^^^^^^^ + def visit_rescue_node(node) + exceptions = + case node.exceptions.length + when 0 + nil + when 1 + if (exception = node.exceptions.first).is_a?(SplatNode) + bounds(exception.location) + on_mrhs_add_star(on_mrhs_new, visit(exception)) + else + [visit(node.exceptions.first)] + end + else + bounds(node.location) + length = node.exceptions.length + + node.exceptions.each_with_index.inject(on_args_new) do |mrhs, (exception, index)| + arg = visit(exception) + + bounds(exception.location) + mrhs = on_mrhs_new_from_args(mrhs) if index == length - 1 + + if exception.is_a?(SplatNode) + if index == length - 1 + on_mrhs_add_star(mrhs, arg) + else + on_args_add_star(mrhs, arg) + end + else + if index == length - 1 + on_mrhs_add(mrhs, arg) + else + on_args_add(mrhs, arg) + end + end + end + end + + reference = visit(node.reference) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + consequent = visit(node.consequent) + + bounds(node.location) + on_rescue(exceptions, reference, statements, consequent) + end + + # def foo(*bar); end + # ^^^^ + # + # def foo(*); end + # ^ + def visit_rest_parameter_node(node) + if node.name_loc.nil? + bounds(node.location) + on_rest_param(nil) + else + bounds(node.name_loc) + on_rest_param(visit_token(node.name.to_s)) + end + end + + # retry + # ^^^^^ + def visit_retry_node(node) + bounds(node.location) + on_retry + end + + # return + # ^^^^^^ + # + # return 1 + # ^^^^^^^^ + def visit_return_node(node) + if node.arguments.nil? + bounds(node.location) + on_return0 + else + arguments = visit(node.arguments) + + bounds(node.location) + on_return(arguments) + end + end + + # self + # ^^^^ + def visit_self_node(node) + bounds(node.location) + on_var_ref(on_kw("self")) + end + + # A shareable constant. + def visit_shareable_constant_node(node) + visit(node.write) + end + + # class << self; end + # ^^^^^^^^^^^^^^^^^^ + def visit_singleton_class_node(node) + expression = visit(node.expression) + bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body) + + bounds(node.location) + on_sclass(expression, bodystmt) + end + + # __ENCODING__ + # ^^^^^^^^^^^^ + def visit_source_encoding_node(node) + bounds(node.location) + on_var_ref(on_kw("__ENCODING__")) + end + + # __FILE__ + # ^^^^^^^^ + def visit_source_file_node(node) + bounds(node.location) + on_var_ref(on_kw("__FILE__")) + end + + # __LINE__ + # ^^^^^^^^ + def visit_source_line_node(node) + bounds(node.location) + on_var_ref(on_kw("__LINE__")) + end + + # foo(*bar) + # ^^^^ + # + # def foo((bar, *baz)); end + # ^^^^ + # + # def foo(*); bar(*); end + # ^ + def visit_splat_node(node) + visit(node.expression) + end + + # A list of statements. + def visit_statements_node(node) + bounds(node.location) + visit_statements_node_body(node.body) + end + + # Visit the list of statements of a statements node. We support nil + # statements in the list. This would normally not be allowed by the + # structure of the prism parse tree, but we manually add them here so that + # we can mirror Ripper's void stmt. + private def visit_statements_node_body(body) + body.inject(on_stmts_new) do |stmts, stmt| + on_stmts_add(stmts, stmt.nil? ? on_void_stmt : visit(stmt)) + end + end + + # "foo" + # ^^^^^ + def visit_string_node(node) + if (content = node.content).empty? + bounds(node.location) + on_string_literal(on_string_content) + elsif (opening = node.opening) == "?" + bounds(node.location) + on_CHAR("?#{node.content}") + elsif opening.start_with?("<<~") + heredoc = visit_heredoc_string_node(node.to_interpolated) + + bounds(node.location) + on_string_literal(heredoc) + else + bounds(node.content_loc) + tstring_content = on_tstring_content(content) + + bounds(node.location) + on_string_literal(on_string_add(on_string_content, tstring_content)) + end + end + + # Ripper gives back the escaped string content but strips out the common + # leading whitespace. Prism gives back the unescaped string content and + # a location for the escaped string content. Unfortunately these don't + # work well together, so here we need to re-derive the common leading + # whitespace. + private def visit_heredoc_node_whitespace(parts) + common_whitespace = nil + dedent_next = true + + parts.each do |part| + if part.is_a?(StringNode) + if dedent_next && !(content = part.content).chomp.empty? + common_whitespace = [ + common_whitespace || Float::INFINITY, + content[/\A\s*/].each_char.inject(0) do |part_whitespace, char| + char == "\t" ? ((part_whitespace / 8 + 1) * 8) : (part_whitespace + 1) + end + ].min + end + + dedent_next = true + else + dedent_next = false + end + end + + common_whitespace || 0 + end + + # Visit a string that is expressed using a <<~ heredoc. + private def visit_heredoc_node(parts, base) + common_whitespace = visit_heredoc_node_whitespace(parts) + + if common_whitespace == 0 + bounds(parts.first.location) + + string = [] + result = base + + parts.each do |part| + if part.is_a?(StringNode) + if string.empty? + string = [part] + else + string << part + end + else + unless string.empty? + bounds(string[0].location) + result = yield result, on_tstring_content(string.map(&:content).join) + string = [] + end + + result = yield result, visit(part) + end + end + + unless string.empty? + bounds(string[0].location) + result = yield result, on_tstring_content(string.map(&:content).join) + end + + result + else + bounds(parts.first.location) + result = + parts.inject(base) do |string_content, part| + yield string_content, visit_string_content(part) + end + + bounds(parts.first.location) + on_heredoc_dedent(result, common_whitespace) + end + end + + # Visit a heredoc node that is representing a string. + private def visit_heredoc_string_node(node) + bounds(node.opening_loc) + on_heredoc_beg(node.opening) + + bounds(node.location) + result = + visit_heredoc_node(node.parts, on_string_content) do |parts, part| + on_string_add(parts, part) + end + + bounds(node.closing_loc) + on_heredoc_end(node.closing) + + result + end + + # Visit a heredoc node that is representing an xstring. + private def visit_heredoc_x_string_node(node) + bounds(node.opening_loc) + on_heredoc_beg(node.opening) + + bounds(node.location) + result = + visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| + on_xstring_add(parts, part) + end + + bounds(node.closing_loc) + on_heredoc_end(node.closing) + + result + end + + # super(foo) + # ^^^^^^^^^^ + def visit_super_node(node) + arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + + if !node.lparen_loc.nil? + bounds(node.lparen_loc) + arguments = on_arg_paren(arguments) + end + + bounds(node.location) + call = on_super(arguments) + + if block.nil? + call + else + bounds(node.block.location) + on_method_add_block(call, block) + end + end + + # :foo + # ^^^^ + def visit_symbol_node(node) + if (opening = node.opening)&.match?(/^%s|['"]:?$/) + bounds(node.value_loc) + content = on_string_content + + if !(value = node.value).empty? + content = on_string_add(content, on_tstring_content(value)) + end + + on_dyna_symbol(content) + elsif (closing = node.closing) == ":" + bounds(node.location) + on_label("#{node.value}:") + elsif opening.nil? && node.closing_loc.nil? + bounds(node.value_loc) + on_symbol_literal(visit_token(node.value)) + else + bounds(node.value_loc) + on_symbol_literal(on_symbol(visit_token(node.value))) + end + end + + # true + # ^^^^ + def visit_true_node(node) + bounds(node.location) + on_var_ref(on_kw("true")) + end + + # undef foo + # ^^^^^^^^^ + def visit_undef_node(node) + names = visit_all(node.names) + + bounds(node.location) + on_undef(names) + end + + # unless foo; bar end + # ^^^^^^^^^^^^^^^^^^^ + # + # bar unless foo + # ^^^^^^^^^^^^^^ + def visit_unless_node(node) + if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + predicate = visit(node.predicate) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + consequent = visit(node.consequent) + + bounds(node.location) + on_unless(predicate, statements, consequent) + else + statements = visit(node.statements.body.first) + predicate = visit(node.predicate) + + bounds(node.location) + on_unless_mod(predicate, statements) + end + end + + # until foo; bar end + # ^^^^^^^^^^^^^^^^^ + # + # bar until foo + # ^^^^^^^^^^^^^ + def visit_until_node(node) + if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + predicate = visit(node.predicate) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + bounds(node.location) + on_until(predicate, statements) + else + statements = visit(node.statements.body.first) + predicate = visit(node.predicate) + + bounds(node.location) + on_until_mod(predicate, statements) + end + end + + # case foo; when bar; end + # ^^^^^^^^^^^^^ + def visit_when_node(node) + # This is a special case where we're not going to call on_when directly + # because we don't have access to the consequent. Instead, we'll return + # the component parts and let the parent node handle it. + conditions = visit_arguments(node.conditions) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + [conditions, statements] + end + + # while foo; bar end + # ^^^^^^^^^^^^^^^^^^ + # + # bar while foo + # ^^^^^^^^^^^^^ + def visit_while_node(node) + if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + predicate = visit(node.predicate) + statements = + if node.statements.nil? + bounds(node.location) + on_stmts_add(on_stmts_new, on_void_stmt) + else + visit(node.statements) + end + + bounds(node.location) + on_while(predicate, statements) + else + statements = visit(node.statements.body.first) + predicate = visit(node.predicate) + + bounds(node.location) + on_while_mod(predicate, statements) + end + end + + # `foo` + # ^^^^^ + def visit_x_string_node(node) + if node.unescaped.empty? + bounds(node.location) + on_xstring_literal(on_xstring_new) + elsif node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node.to_interpolated) + + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.content_loc) + content = on_tstring_content(node.content) + + bounds(node.location) + on_xstring_literal(on_xstring_add(on_xstring_new, content)) + end + end + + # yield + # ^^^^^ + # + # yield 1 + # ^^^^^^^ + def visit_yield_node(node) + if node.arguments.nil? && node.lparen_loc.nil? + bounds(node.location) + on_yield0 + else + arguments = + if node.arguments.nil? + bounds(node.location) + on_args_new + else + visit(node.arguments) + end + + unless node.lparen_loc.nil? + bounds(node.lparen_loc) + arguments = on_paren(arguments) + end + + bounds(node.location) + on_yield(arguments) + end + end + + private + + # Lazily initialize the parse result. + def result + @result ||= + begin + scopes = RUBY_VERSION >= "3.3.0" ? [] : [[]] + Prism.parse(source, scopes: scopes) + end + end + + ########################################################################## + # Helpers + ########################################################################## + + # Returns true if there is a comma between the two locations. + def trailing_comma?(left, right) + source.byteslice(left.end_offset...right.start_offset).include?(",") + end + + # Returns true if there is a semicolon between the two locations. + def void_stmt?(left, right, allow_newline) + pattern = allow_newline ? /[;\n]/ : /;/ + source.byteslice(left.end_offset...right.start_offset).match?(pattern) + end + + # Visit the string content of a particular node. This method is used to + # split into the various token types. + def visit_token(token, allow_keywords = true) + case token + when "." + on_period(token) + when "`" + on_backtick(token) + when *(allow_keywords ? KEYWORDS : []) + on_kw(token) + when /^_/ + on_ident(token) + when /^[[:upper:]]\w*$/ + on_const(token) + when /^@@/ + on_cvar(token) + when /^@/ + on_ivar(token) + when /^\$/ + on_gvar(token) + when /^[[:punct:]]/ + on_op(token) + else + on_ident(token) + end + end + + # Visit a node that represents a number. We need to explicitly handle the + # unary - operator. + def visit_number_node(node) + slice = node.slice + location = node.location + + if slice[0] == "-" + bounds(location.copy(start_offset: location.start_offset + 1)) + value = yield slice[1..-1] + + bounds(node.location) + on_unary(:-@, value) + else + bounds(location) + yield slice + end + end + + # Visit a node that represents a write value. This is used to handle the + # special case of an implicit array that is generated without brackets. + def visit_write_value(node) + if node.is_a?(ArrayNode) && node.opening_loc.nil? + elements = node.elements + length = elements.length + + bounds(elements.first.location) + elements.each_with_index.inject((elements.first.is_a?(SplatNode) && length == 1) ? on_mrhs_new : on_args_new) do |args, (element, index)| + arg = visit(element) + bounds(element.location) + + if index == length - 1 + if element.is_a?(SplatNode) + mrhs = index == 0 ? args : on_mrhs_new_from_args(args) + on_mrhs_add_star(mrhs, arg) + else + on_mrhs_add(on_mrhs_new_from_args(args), arg) + end + else + case element + when BlockArgumentNode + on_args_add_block(args, arg) + when SplatNode + on_args_add_star(args, arg) + else + on_args_add(args, arg) + end + end + end + else + visit(node) + end + end + + # This method is responsible for updating lineno and column information + # to reflect the current node. + # + # This method could be drastically improved with some caching on the start + # of every line, but for now it's good enough. + def bounds(location) + @lineno = location.start_line + @column = location.start_column + end + + ########################################################################## + # Ripper interface + ########################################################################## + + # :stopdoc: + def _dispatch_0; end + def _dispatch_1(_); end + def _dispatch_2(_, _); end + def _dispatch_3(_, _, _); end + def _dispatch_4(_, _, _, _); end + def _dispatch_5(_, _, _, _, _); end + def _dispatch_7(_, _, _, _, _, _, _); end + # :startdoc: + + # + # Parser Events + # + + PARSER_EVENT_TABLE.each do |id, arity| + alias_method "on_#{id}", "_dispatch_#{arity}" + end + + # This method is called when weak warning is produced by the parser. + # +fmt+ and +args+ is printf style. + def warn(fmt, *args) + end + + # This method is called when strong warning is produced by the parser. + # +fmt+ and +args+ is printf style. + def warning(fmt, *args) + end + + # This method is called when the parser found syntax error. + def compile_error(msg) + end + + # + # Scanner Events + # + + SCANNER_EVENTS.each do |id| + alias_method "on_#{id}", :_dispatch_1 + end + + # This method is provided by the Ripper C extension. It is called when a + # string needs to be dedented because of a tilde heredoc. It is expected + # that it will modify the string in place and return the number of bytes + # that were removed. + def dedent_string(string, width) + whitespace = 0 + cursor = 0 + + while cursor < string.length && string[cursor].match?(/\s/) && whitespace < width + if string[cursor] == "\t" + whitespace = ((whitespace / 8 + 1) * 8) + break if whitespace > width + else + whitespace += 1 + end + + cursor += 1 + end + + string.replace(string[cursor..]) + cursor + end + end + end +end diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb new file mode 100644 index 0000000000..dc26a639a3 --- /dev/null +++ b/lib/prism/translation/ripper/sexp.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +require_relative "../ripper" + +module Prism + module Translation + class Ripper + # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that + # returns the arrays of [type, *children]. + class SexpBuilder < Ripper + # :stopdoc: + + attr_reader :error + + private + + def dedent_element(e, width) + if (n = dedent_string(e[1], width)) > 0 + e[2][1] += n + end + e + end + + def on_heredoc_dedent(val, width) + sub = proc do |cont| + cont.map! do |e| + if Array === e + case e[0] + when :@tstring_content + e = dedent_element(e, width) + when /_add\z/ + e[1] = sub[e[1]] + end + elsif String === e + dedent_string(e, width) + end + e + end + end + sub[val] + val + end + + events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym} + (PARSER_EVENTS - events).each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(*args) + args.unshift :#{event} + end + End + end + + SCANNER_EVENTS.each do |event| + module_eval(<<-End, __FILE__, __LINE__ + 1) + def on_#{event}(tok) + [:@#{event}, tok, [lineno(), column()]] + end + End + end + + def on_error(mesg) + @error = mesg + end + remove_method :on_parse_error + alias on_parse_error on_error + alias compile_error on_error + + # :startdoc: + end + + # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that + # returns the same values as ::Ripper::SexpBuilder except with a couple of + # niceties that flatten linked lists into arrays. + class SexpBuilderPP < SexpBuilder + # :stopdoc: + + private + + def on_heredoc_dedent(val, width) + val.map! do |e| + next e if Symbol === e and /_content\z/ =~ e + if Array === e and e[0] == :@tstring_content + e = dedent_element(e, width) + elsif String === e + dedent_string(e, width) + end + e + end + val + end + + def _dispatch_event_new + [] + end + + def _dispatch_event_push(list, item) + list.push item + list + end + + def on_mlhs_paren(list) + [:mlhs, *list] + end + + def on_mlhs_add_star(list, star) + list.push([:rest_param, star]) + end + + def on_mlhs_add_post(list, post) + list.concat(post) + end + + PARSER_EVENT_TABLE.each do |event, arity| + if /_new\z/ =~ event and arity == 0 + alias_method "on_#{event}", :_dispatch_event_new + elsif /_add\z/ =~ event + alias_method "on_#{event}", :_dispatch_event_push + end + end + + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb new file mode 100644 index 0000000000..10e21cd16a --- /dev/null +++ b/lib/prism/translation/ripper/shim.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +# This writes the prism ripper translation into the Ripper constant so that +# users can transparently use Ripper without any changes. +Ripper = Prism::Translation::Ripper diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb new file mode 100644 index 0000000000..ec458a3b63 --- /dev/null +++ b/lib/prism/translation/ruby_parser.rb @@ -0,0 +1,1594 @@ +# frozen_string_literal: true + +begin + require "ruby_parser" +rescue LoadError + warn(%q{Error: Unable to load ruby_parser. Add `gem "ruby_parser"` to your Gemfile.}) + exit(1) +end + +module Prism + module Translation + # This module is the entry-point for converting a prism syntax tree into the + # seattlerb/ruby_parser gem's syntax tree. + class RubyParser + # A prism visitor that builds Sexp objects. + class Compiler < ::Prism::Compiler + # This is the name of the file that we are compiling. We set it on every + # Sexp object that is generated, and also use it to compile __FILE__ + # nodes. + attr_reader :file + + # Class variables will change their type based on if they are inside of + # a method definition or not, so we need to track that state. + attr_reader :in_def + + # Some nodes will change their representation if they are inside of a + # pattern, so we need to track that state. + attr_reader :in_pattern + + # Initialize a new compiler with the given file name. + def initialize(file, in_def: false, in_pattern: false) + @file = file + @in_def = in_def + @in_pattern = in_pattern + end + + # alias foo bar + # ^^^^^^^^^^^^^ + def visit_alias_method_node(node) + s(node, :alias, visit(node.new_name), visit(node.old_name)) + end + + # alias $foo $bar + # ^^^^^^^^^^^^^^^ + def visit_alias_global_variable_node(node) + s(node, :valias, node.new_name.name, node.old_name.name) + end + + # foo => bar | baz + # ^^^^^^^^^ + def visit_alternation_pattern_node(node) + s(node, :or, visit(node.left), visit(node.right)) + end + + # a and b + # ^^^^^^^ + def visit_and_node(node) + s(node, :and, visit(node.left), visit(node.right)) + end + + # [] + # ^^ + def visit_array_node(node) + if in_pattern + s(node, :array_pat, nil).concat(visit_all(node.elements)) + else + s(node, :array).concat(visit_all(node.elements)) + end + end + + # foo => [bar] + # ^^^^^ + def visit_array_pattern_node(node) + if node.constant.nil? && node.requireds.empty? && node.rest.nil? && node.posts.empty? + s(node, :array_pat) + else + result = s(node, :array_pat, visit_pattern_constant(node.constant)).concat(visit_all(node.requireds)) + + case node.rest + when SplatNode + result << :"*#{node.rest.expression&.name}" + when ImplicitRestNode + result << :* + + # This doesn't make any sense at all, but since we're trying to + # replicate the behavior directly, we'll copy it. + result.line(666) + end + + result.concat(visit_all(node.posts)) + end + end + + # foo(bar) + # ^^^ + def visit_arguments_node(node) + raise "Cannot visit arguments directly" + end + + # { a: 1 } + # ^^^^ + def visit_assoc_node(node) + [visit(node.key), visit(node.value)] + end + + # def foo(**); bar(**); end + # ^^ + # + # { **foo } + # ^^^^^ + def visit_assoc_splat_node(node) + if node.value.nil? + [s(node, :kwsplat)] + else + [s(node, :kwsplat, visit(node.value))] + end + end + + # $+ + # ^^ + def visit_back_reference_read_node(node) + s(node, :back_ref, node.name.name.delete_prefix("$").to_sym) + end + + # begin end + # ^^^^^^^^^ + def visit_begin_node(node) + result = node.statements.nil? ? s(node, :nil) : visit(node.statements) + + if !node.rescue_clause.nil? + if !node.statements.nil? + result = s(node.statements, :rescue, result, visit(node.rescue_clause)) + else + result = s(node.rescue_clause, :rescue, visit(node.rescue_clause)) + end + + current = node.rescue_clause + until (current = current.consequent).nil? + result << visit(current) + end + end + + if !node.else_clause&.statements.nil? + result << visit(node.else_clause) + end + + if !node.ensure_clause.nil? + if !node.statements.nil? || !node.rescue_clause.nil? || !node.else_clause.nil? + result = s(node.statements || node.rescue_clause || node.else_clause || node.ensure_clause, :ensure, result, visit(node.ensure_clause)) + else + result = s(node.ensure_clause, :ensure, visit(node.ensure_clause)) + end + end + + result + end + + # foo(&bar) + # ^^^^ + def visit_block_argument_node(node) + s(node, :block_pass).tap do |result| + result << visit(node.expression) unless node.expression.nil? + end + end + + # foo { |; bar| } + # ^^^ + def visit_block_local_variable_node(node) + node.name + end + + # A block on a keyword or method call. + def visit_block_node(node) + s(node, :block_pass, visit(node.expression)) + end + + # def foo(&bar); end + # ^^^^ + def visit_block_parameter_node(node) + :"&#{node.name}" + end + + # A block's parameters. + def visit_block_parameters_node(node) + # If this block parameters has no parameters and is using pipes, then + # it inherits its location from its shadow locals, even if they're not + # on the same lines as the pipes. + shadow_loc = true + + result = + if node.parameters.nil? + s(node, :args) + else + shadow_loc = false + visit(node.parameters) + end + + if node.opening == "(" + result.line = node.opening_loc.start_line + result.line_max = node.closing_loc.end_line + shadow_loc = false + end + + if node.locals.any? + shadow = s(node, :shadow).concat(visit_all(node.locals)) + shadow.line = node.locals.first.location.start_line + shadow.line_max = node.locals.last.location.end_line + result << shadow + + if shadow_loc + result.line = shadow.line + result.line_max = shadow.line_max + end + end + + result + end + + # break + # ^^^^^ + # + # break foo + # ^^^^^^^^^ + def visit_break_node(node) + if node.arguments.nil? + s(node, :break) + elsif node.arguments.arguments.length == 1 + s(node, :break, visit(node.arguments.arguments.first)) + else + s(node, :break, s(node.arguments, :array).concat(visit_all(node.arguments.arguments))) + end + end + + # foo + # ^^^ + # + # foo.bar + # ^^^^^^^ + # + # foo.bar() {} + # ^^^^^^^^^^^^ + def visit_call_node(node) + case node.name + when :!~ + return s(node, :not, visit(node.copy(name: :"=~"))) + when :=~ + if node.arguments&.arguments&.length == 1 && node.block.nil? + case node.receiver + when StringNode + return s(node, :match3, visit(node.arguments.arguments.first), visit(node.receiver)) + when RegularExpressionNode, InterpolatedRegularExpressionNode + return s(node, :match2, visit(node.receiver), visit(node.arguments.arguments.first)) + end + end + end + + type = node.attribute_write? ? :attrasgn : :call + type = :"safe_#{type}" if node.safe_navigation? + + arguments = node.arguments&.arguments || [] + write_value = arguments.pop if type == :attrasgn + block = node.block + + if block.is_a?(BlockArgumentNode) + arguments << block + block = nil + end + + result = s(node, type, visit(node.receiver), node.name).concat(visit_all(arguments)) + result << visit_write_value(write_value) unless write_value.nil? + + visit_block(node, result, block) + end + + # foo.bar += baz + # ^^^^^^^^^^^^^^^ + def visit_call_operator_write_node(node) + if op_asgn?(node) + s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, node.binary_operator) + else + s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, node.binary_operator, visit_write_value(node.value)) + end + end + + # foo.bar &&= baz + # ^^^^^^^^^^^^^^^ + def visit_call_and_write_node(node) + if op_asgn?(node) + s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, :"&&") + else + s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, :"&&", visit_write_value(node.value)) + end + end + + # foo.bar ||= baz + # ^^^^^^^^^^^^^^^ + def visit_call_or_write_node(node) + if op_asgn?(node) + s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, :"||") + else + s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, :"||", visit_write_value(node.value)) + end + end + + # Call nodes with operators following them will either be op_asgn or + # op_asgn2 nodes. That is determined by their call operator and their + # right-hand side. + private def op_asgn?(node) + node.call_operator == "::" || (node.value.is_a?(CallNode) && node.value.opening_loc.nil? && !node.value.arguments.nil?) + end + + # Call nodes with operators following them can use &. as an operator, + # which changes their type by prefixing "safe_". + private def op_asgn_type(node, type) + node.safe_navigation? ? :"safe_#{type}" : type + end + + # foo.bar, = 1 + # ^^^^^^^ + def visit_call_target_node(node) + s(node, :attrasgn, visit(node.receiver), node.name) + end + + # foo => bar => baz + # ^^^^^^^^^^ + def visit_capture_pattern_node(node) + visit(node.target) << visit(node.value) + end + + # case foo; when bar; end + # ^^^^^^^^^^^^^^^^^^^^^^^ + def visit_case_node(node) + s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.consequent) + end + + # case foo; in bar; end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_case_match_node(node) + s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.consequent) + end + + # class Foo; end + # ^^^^^^^^^^^^^^ + def visit_class_node(node) + name = + if node.constant_path.is_a?(ConstantReadNode) + node.name + else + visit(node.constant_path) + end + + if node.body.nil? + s(node, :class, name, visit(node.superclass)) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) + end + end + + # @@foo + # ^^^^^ + def visit_class_variable_read_node(node) + s(node, :cvar, node.name) + end + + # @@foo = 1 + # ^^^^^^^^^ + # + # @@foo, @@bar = 1 + # ^^^^^ ^^^^^ + def visit_class_variable_write_node(node) + s(node, class_variable_write_type, node.name, visit_write_value(node.value)) + end + + # @@foo += bar + # ^^^^^^^^^^^^ + def visit_class_variable_operator_write_node(node) + s(node, class_variable_write_type, node.name, s(node, :call, s(node, :cvar, node.name), node.binary_operator, visit_write_value(node.value))) + end + + # @@foo &&= bar + # ^^^^^^^^^^^^^ + def visit_class_variable_and_write_node(node) + s(node, :op_asgn_and, s(node, :cvar, node.name), s(node, class_variable_write_type, node.name, visit_write_value(node.value))) + end + + # @@foo ||= bar + # ^^^^^^^^^^^^^ + def visit_class_variable_or_write_node(node) + s(node, :op_asgn_or, s(node, :cvar, node.name), s(node, class_variable_write_type, node.name, visit_write_value(node.value))) + end + + # @@foo, = bar + # ^^^^^ + def visit_class_variable_target_node(node) + s(node, class_variable_write_type, node.name) + end + + # If a class variable is written within a method definition, it has a + # different type than everywhere else. + private def class_variable_write_type + in_def ? :cvasgn : :cvdecl + end + + # Foo + # ^^^ + def visit_constant_read_node(node) + s(node, :const, node.name) + end + + # Foo = 1 + # ^^^^^^^ + # + # Foo, Bar = 1 + # ^^^ ^^^ + def visit_constant_write_node(node) + s(node, :cdecl, node.name, visit_write_value(node.value)) + end + + # Foo += bar + # ^^^^^^^^^^^ + def visit_constant_operator_write_node(node) + s(node, :cdecl, node.name, s(node, :call, s(node, :const, node.name), node.binary_operator, visit_write_value(node.value))) + end + + # Foo &&= bar + # ^^^^^^^^^^^^ + def visit_constant_and_write_node(node) + s(node, :op_asgn_and, s(node, :const, node.name), s(node, :cdecl, node.name, visit(node.value))) + end + + # Foo ||= bar + # ^^^^^^^^^^^^ + def visit_constant_or_write_node(node) + s(node, :op_asgn_or, s(node, :const, node.name), s(node, :cdecl, node.name, visit(node.value))) + end + + # Foo, = bar + # ^^^ + def visit_constant_target_node(node) + s(node, :cdecl, node.name) + end + + # Foo::Bar + # ^^^^^^^^ + def visit_constant_path_node(node) + if node.parent.nil? + s(node, :colon3, node.name) + else + s(node, :colon2, visit(node.parent), node.name) + end + end + + # Foo::Bar = 1 + # ^^^^^^^^^^^^ + # + # Foo::Foo, Bar::Bar = 1 + # ^^^^^^^^ ^^^^^^^^ + def visit_constant_path_write_node(node) + s(node, :cdecl, visit(node.target), visit_write_value(node.value)) + end + + # Foo::Bar += baz + # ^^^^^^^^^^^^^^^ + def visit_constant_path_operator_write_node(node) + s(node, :op_asgn, visit(node.target), node.binary_operator, visit_write_value(node.value)) + end + + # Foo::Bar &&= baz + # ^^^^^^^^^^^^^^^^ + def visit_constant_path_and_write_node(node) + s(node, :op_asgn_and, visit(node.target), visit_write_value(node.value)) + end + + # Foo::Bar ||= baz + # ^^^^^^^^^^^^^^^^ + def visit_constant_path_or_write_node(node) + s(node, :op_asgn_or, visit(node.target), visit_write_value(node.value)) + end + + # Foo::Bar, = baz + # ^^^^^^^^ + def visit_constant_path_target_node(node) + inner = + if node.parent.nil? + s(node, :colon3, node.child.name) + else + s(node, :colon2, visit(node.parent), node.child.name) + end + + s(node, :const, inner) + end + + # def foo; end + # ^^^^^^^^^^^^ + # + # def self.foo; end + # ^^^^^^^^^^^^^^^^^ + def visit_def_node(node) + name = node.name_loc.slice.to_sym + result = + if node.receiver.nil? + s(node, :defn, name) + else + s(node, :defs, visit(node.receiver), name) + end + + result.line(node.name_loc.start_line) + if node.parameters.nil? + result << s(node, :args).line(node.name_loc.start_line) + else + result << visit(node.parameters) + end + + if node.body.nil? + result << s(node, :nil) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: true) + result.concat(node.body.body.map { |child| child.accept(compiler) }) + else + result << node.body.accept(copy_compiler(in_def: true)) + end + end + + # defined? a + # ^^^^^^^^^^ + # + # defined?(a) + # ^^^^^^^^^^^ + def visit_defined_node(node) + s(node, :defined, visit(node.value)) + end + + # if foo then bar else baz end + # ^^^^^^^^^^^^ + def visit_else_node(node) + visit(node.statements) + end + + # "foo #{bar}" + # ^^^^^^ + def visit_embedded_statements_node(node) + result = s(node, :evstr) + result << visit(node.statements) unless node.statements.nil? + result + end + + # "foo #@bar" + # ^^^^^ + def visit_embedded_variable_node(node) + s(node, :evstr, visit(node.variable)) + end + + # begin; foo; ensure; bar; end + # ^^^^^^^^^^^^ + def visit_ensure_node(node) + node.statements.nil? ? s(node, :nil) : visit(node.statements) + end + + # false + # ^^^^^ + def visit_false_node(node) + s(node, :false) + end + + # foo => [*, bar, *] + # ^^^^^^^^^^^ + def visit_find_pattern_node(node) + s(node, :find_pat, visit_pattern_constant(node.constant), :"*#{node.left.expression&.name}", *visit_all(node.requireds), :"*#{node.right.expression&.name}") + end + + # if foo .. bar; end + # ^^^^^^^^^^ + def visit_flip_flop_node(node) + if node.left.is_a?(IntegerNode) && node.right.is_a?(IntegerNode) + s(node, :lit, Range.new(node.left.value, node.right.value, node.exclude_end?)) + else + s(node, node.exclude_end? ? :flip3 : :flip2, visit(node.left), visit(node.right)) + end + end + + # 1.0 + # ^^^ + def visit_float_node(node) + s(node, :lit, node.value) + end + + # for foo in bar do end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_for_node(node) + s(node, :for, visit(node.collection), visit(node.index), visit(node.statements)) + end + + # def foo(...); bar(...); end + # ^^^ + def visit_forwarding_arguments_node(node) + s(node, :forward_args) + end + + # def foo(...); end + # ^^^ + def visit_forwarding_parameter_node(node) + s(node, :forward_args) + end + + # super + # ^^^^^ + # + # super {} + # ^^^^^^^^ + def visit_forwarding_super_node(node) + visit_block(node, s(node, :zsuper), node.block) + end + + # $foo + # ^^^^ + def visit_global_variable_read_node(node) + s(node, :gvar, node.name) + end + + # $foo = 1 + # ^^^^^^^^ + # + # $foo, $bar = 1 + # ^^^^ ^^^^ + def visit_global_variable_write_node(node) + s(node, :gasgn, node.name, visit_write_value(node.value)) + end + + # $foo += bar + # ^^^^^^^^^^^ + def visit_global_variable_operator_write_node(node) + s(node, :gasgn, node.name, s(node, :call, s(node, :gvar, node.name), node.binary_operator, visit(node.value))) + end + + # $foo &&= bar + # ^^^^^^^^^^^^ + def visit_global_variable_and_write_node(node) + s(node, :op_asgn_and, s(node, :gvar, node.name), s(node, :gasgn, node.name, visit_write_value(node.value))) + end + + # $foo ||= bar + # ^^^^^^^^^^^^ + def visit_global_variable_or_write_node(node) + s(node, :op_asgn_or, s(node, :gvar, node.name), s(node, :gasgn, node.name, visit_write_value(node.value))) + end + + # $foo, = bar + # ^^^^ + def visit_global_variable_target_node(node) + s(node, :gasgn, node.name) + end + + # {} + # ^^ + def visit_hash_node(node) + s(node, :hash).concat(node.elements.flat_map { |element| visit(element) }) + end + + # foo => {} + # ^^ + def visit_hash_pattern_node(node) + result = s(node, :hash_pat, visit_pattern_constant(node.constant)).concat(node.elements.flat_map { |element| visit(element) }) + + case node.rest + when AssocSplatNode + result << s(node.rest, :kwrest, :"**#{node.rest.value&.name}") + when NoKeywordsParameterNode + result << visit(node.rest) + end + + result + end + + # if foo then bar end + # ^^^^^^^^^^^^^^^^^^^ + # + # bar if foo + # ^^^^^^^^^^ + # + # foo ? bar : baz + # ^^^^^^^^^^^^^^^ + def visit_if_node(node) + s(node, :if, visit(node.predicate), visit(node.statements), visit(node.consequent)) + end + + # 1i + def visit_imaginary_node(node) + s(node, :lit, node.value) + end + + # { foo: } + # ^^^^ + def visit_implicit_node(node) + end + + # foo { |bar,| } + # ^ + def visit_implicit_rest_node(node) + end + + # case foo; in bar; end + # ^^^^^^^^^^^^^^^^^^^^^ + def visit_in_node(node) + pattern = + if node.pattern.is_a?(ConstantPathNode) + s(node.pattern, :const, visit(node.pattern)) + else + node.pattern.accept(copy_compiler(in_pattern: true)) + end + + s(node, :in, pattern).concat(node.statements.nil? ? [nil] : visit_all(node.statements.body)) + end + + # foo[bar] += baz + # ^^^^^^^^^^^^^^^ + def visit_index_operator_write_node(node) + arglist = nil + + if !node.arguments.nil? || !node.block.nil? + arglist = s(node, :arglist).concat(visit_all(node.arguments&.arguments || [])) + arglist << visit(node.block) if !node.block.nil? + end + + s(node, :op_asgn1, visit(node.receiver), arglist, node.binary_operator, visit_write_value(node.value)) + end + + # foo[bar] &&= baz + # ^^^^^^^^^^^^^^^^ + def visit_index_and_write_node(node) + arglist = nil + + if !node.arguments.nil? || !node.block.nil? + arglist = s(node, :arglist).concat(visit_all(node.arguments&.arguments || [])) + arglist << visit(node.block) if !node.block.nil? + end + + s(node, :op_asgn1, visit(node.receiver), arglist, :"&&", visit_write_value(node.value)) + end + + # foo[bar] ||= baz + # ^^^^^^^^^^^^^^^^ + def visit_index_or_write_node(node) + arglist = nil + + if !node.arguments.nil? || !node.block.nil? + arglist = s(node, :arglist).concat(visit_all(node.arguments&.arguments || [])) + arglist << visit(node.block) if !node.block.nil? + end + + s(node, :op_asgn1, visit(node.receiver), arglist, :"||", visit_write_value(node.value)) + end + + # foo[bar], = 1 + # ^^^^^^^^ + def visit_index_target_node(node) + arguments = visit_all(node.arguments&.arguments || []) + arguments << visit(node.block) unless node.block.nil? + + s(node, :attrasgn, visit(node.receiver), :[]=).concat(arguments) + end + + # @foo + # ^^^^ + def visit_instance_variable_read_node(node) + s(node, :ivar, node.name) + end + + # @foo = 1 + # ^^^^^^^^ + # + # @foo, @bar = 1 + # ^^^^ ^^^^ + def visit_instance_variable_write_node(node) + s(node, :iasgn, node.name, visit_write_value(node.value)) + end + + # @foo += bar + # ^^^^^^^^^^^ + def visit_instance_variable_operator_write_node(node) + s(node, :iasgn, node.name, s(node, :call, s(node, :ivar, node.name), node.binary_operator, visit_write_value(node.value))) + end + + # @foo &&= bar + # ^^^^^^^^^^^^ + def visit_instance_variable_and_write_node(node) + s(node, :op_asgn_and, s(node, :ivar, node.name), s(node, :iasgn, node.name, visit(node.value))) + end + + # @foo ||= bar + # ^^^^^^^^^^^^ + def visit_instance_variable_or_write_node(node) + s(node, :op_asgn_or, s(node, :ivar, node.name), s(node, :iasgn, node.name, visit(node.value))) + end + + # @foo, = bar + # ^^^^ + def visit_instance_variable_target_node(node) + s(node, :iasgn, node.name) + end + + # 1 + # ^ + def visit_integer_node(node) + s(node, :lit, node.value) + end + + # if /foo #{bar}/ then end + # ^^^^^^^^^^^^ + def visit_interpolated_match_last_line_node(node) + parts = visit_interpolated_parts(node.parts) + regexp = + if parts.length == 1 + s(node, :lit, Regexp.new(parts.first, node.options)) + else + s(node, :dregx).concat(parts).tap do |result| + options = node.options + result << options if options != 0 + end + end + + s(node, :match, regexp) + end + + # /foo #{bar}/ + # ^^^^^^^^^^^^ + def visit_interpolated_regular_expression_node(node) + parts = visit_interpolated_parts(node.parts) + + if parts.length == 1 + s(node, :lit, Regexp.new(parts.first, node.options)) + else + s(node, :dregx).concat(parts).tap do |result| + options = node.options + result << options if options != 0 + end + end + end + + # "foo #{bar}" + # ^^^^^^^^^^^^ + def visit_interpolated_string_node(node) + parts = visit_interpolated_parts(node.parts) + parts.length == 1 ? s(node, :str, parts.first) : s(node, :dstr).concat(parts) + end + + # :"foo #{bar}" + # ^^^^^^^^^^^^^ + def visit_interpolated_symbol_node(node) + parts = visit_interpolated_parts(node.parts) + parts.length == 1 ? s(node, :lit, parts.first.to_sym) : s(node, :dsym).concat(parts) + end + + # `foo #{bar}` + # ^^^^^^^^^^^^ + def visit_interpolated_x_string_node(node) + source = node.heredoc? ? node.parts.first : node + parts = visit_interpolated_parts(node.parts) + parts.length == 1 ? s(source, :xstr, parts.first) : s(source, :dxstr).concat(parts) + end + + # Visit the interpolated content of the string-like node. + private def visit_interpolated_parts(parts) + visited = [] + parts.each do |part| + result = visit(part) + + if result[0] == :evstr && result[1] + if result[1][0] == :str + visited << result[1] + elsif result[1][0] == :dstr + visited.concat(result[1][1..-1]) + else + visited << result + end + elsif result[0] == :dstr + visited.concat(result[1..-1]) + else + visited << result + end + end + + state = :beginning #: :beginning | :string_content | :interpolated_content + + visited.each_with_object([]) do |result, results| + case state + when :beginning + if result.is_a?(String) + results << result + state = :string_content + elsif result.is_a?(Array) && result[0] == :str + results << result[1] + state = :string_content + else + results << "" + results << result + state = :interpolated_content + end + when :string_content + if result.is_a?(String) + results[0] << result + elsif result.is_a?(Array) && result[0] == :str + results[0] << result[1] + else + results << result + state = :interpolated_content + end + when :interpolated_content + if result.is_a?(Array) && result[0] == :str && results[-1][0] == :str && (results[-1].line_max == result.line) + results[-1][1] << result[1] + results[-1].line_max = result.line_max + else + results << result + end + end + end + end + + # -> { it } + # ^^ + def visit_it_local_variable_read_node(node) + s(node, :call, nil, :it) + end + + # foo(bar: baz) + # ^^^^^^^^ + def visit_keyword_hash_node(node) + s(node, :hash).concat(node.elements.flat_map { |element| visit(element) }) + end + + # def foo(**bar); end + # ^^^^^ + # + # def foo(**); end + # ^^ + def visit_keyword_rest_parameter_node(node) + :"**#{node.name}" + end + + # -> {} + def visit_lambda_node(node) + parameters = + case node.parameters + when nil, NumberedParametersNode + s(node, :args) + else + visit(node.parameters) + end + + if node.body.nil? + s(node, :iter, s(node, :lambda), parameters) + else + s(node, :iter, s(node, :lambda), parameters, visit(node.body)) + end + end + + # foo + # ^^^ + def visit_local_variable_read_node(node) + if node.name.match?(/^_\d$/) + s(node, :call, nil, node.name) + else + s(node, :lvar, node.name) + end + end + + # foo = 1 + # ^^^^^^^ + # + # foo, bar = 1 + # ^^^ ^^^ + def visit_local_variable_write_node(node) + s(node, :lasgn, node.name, visit_write_value(node.value)) + end + + # foo += bar + # ^^^^^^^^^^ + def visit_local_variable_operator_write_node(node) + s(node, :lasgn, node.name, s(node, :call, s(node, :lvar, node.name), node.binary_operator, visit_write_value(node.value))) + end + + # foo &&= bar + # ^^^^^^^^^^^ + def visit_local_variable_and_write_node(node) + s(node, :op_asgn_and, s(node, :lvar, node.name), s(node, :lasgn, node.name, visit_write_value(node.value))) + end + + # foo ||= bar + # ^^^^^^^^^^^ + def visit_local_variable_or_write_node(node) + s(node, :op_asgn_or, s(node, :lvar, node.name), s(node, :lasgn, node.name, visit_write_value(node.value))) + end + + # foo, = bar + # ^^^ + def visit_local_variable_target_node(node) + s(node, :lasgn, node.name) + end + + # if /foo/ then end + # ^^^^^ + def visit_match_last_line_node(node) + s(node, :match, s(node, :lit, Regexp.new(node.unescaped, node.options))) + end + + # foo in bar + # ^^^^^^^^^^ + def visit_match_predicate_node(node) + s(node, :case, visit(node.value), s(node, :in, node.pattern.accept(copy_compiler(in_pattern: true)), nil), nil) + end + + # foo => bar + # ^^^^^^^^^^ + def visit_match_required_node(node) + s(node, :case, visit(node.value), s(node, :in, node.pattern.accept(copy_compiler(in_pattern: true)), nil), nil) + end + + # /(?<foo>foo)/ =~ bar + # ^^^^^^^^^^^^^^^^^^^^ + def visit_match_write_node(node) + s(node, :match2, visit(node.call.receiver), visit(node.call.arguments.arguments.first)) + end + + # A node that is missing from the syntax tree. This is only used in the + # case of a syntax error. The parser gem doesn't have such a concept, so + # we invent our own here. + def visit_missing_node(node) + raise "Cannot visit missing node directly" + end + + # module Foo; end + # ^^^^^^^^^^^^^^^ + def visit_module_node(node) + name = + if node.constant_path.is_a?(ConstantReadNode) + node.name + else + visit(node.constant_path) + end + + if node.body.nil? + s(node, :module, name) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) + end + end + + # foo, bar = baz + # ^^^^^^^^ + def visit_multi_target_node(node) + targets = [*node.lefts] + targets << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + targets.concat(node.rights) + + s(node, :masgn, s(node, :array).concat(visit_all(targets))) + end + + # foo, bar = baz + # ^^^^^^^^^^^^^^ + def visit_multi_write_node(node) + targets = [*node.lefts] + targets << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + targets.concat(node.rights) + + value = + if node.value.is_a?(ArrayNode) && node.value.opening_loc.nil? + if node.value.elements.length == 1 && node.value.elements.first.is_a?(SplatNode) + visit(node.value.elements.first) + else + visit(node.value) + end + else + s(node.value, :to_ary, visit(node.value)) + end + + s(node, :masgn, s(node, :array).concat(visit_all(targets)), value) + end + + # next + # ^^^^ + # + # next foo + # ^^^^^^^^ + def visit_next_node(node) + if node.arguments.nil? + s(node, :next) + elsif node.arguments.arguments.length == 1 + argument = node.arguments.arguments.first + s(node, :next, argument.is_a?(SplatNode) ? s(node, :svalue, visit(argument)) : visit(argument)) + else + s(node, :next, s(node, :array).concat(visit_all(node.arguments.arguments))) + end + end + + # nil + # ^^^ + def visit_nil_node(node) + s(node, :nil) + end + + # def foo(**nil); end + # ^^^^^ + def visit_no_keywords_parameter_node(node) + in_pattern ? s(node, :kwrest, :"**nil") : :"**nil" + end + + # -> { _1 + _2 } + # ^^^^^^^^^^^^^^ + def visit_numbered_parameters_node(node) + raise "Cannot visit numbered parameters directly" + end + + # $1 + # ^^ + def visit_numbered_reference_read_node(node) + s(node, :nth_ref, node.number) + end + + # def foo(bar: baz); end + # ^^^^^^^^ + def visit_optional_keyword_parameter_node(node) + s(node, :kwarg, node.name, visit(node.value)) + end + + # def foo(bar = 1); end + # ^^^^^^^ + def visit_optional_parameter_node(node) + s(node, :lasgn, node.name, visit(node.value)) + end + + # a or b + # ^^^^^^ + def visit_or_node(node) + s(node, :or, visit(node.left), visit(node.right)) + end + + # def foo(bar, *baz); end + # ^^^^^^^^^ + def visit_parameters_node(node) + children = + node.compact_child_nodes.map do |element| + if element.is_a?(MultiTargetNode) + visit_destructured_parameter(element) + else + visit(element) + end + end + + s(node, :args).concat(children) + end + + # def foo((bar, baz)); end + # ^^^^^^^^^^ + private def visit_destructured_parameter(node) + children = + [*node.lefts, *node.rest, *node.rights].map do |child| + case child + when RequiredParameterNode + visit(child) + when MultiTargetNode + visit_destructured_parameter(child) + when SplatNode + :"*#{child.expression&.name}" + else + raise + end + end + + s(node, :masgn).concat(children) + end + + # () + # ^^ + # + # (1) + # ^^^ + def visit_parentheses_node(node) + if node.body.nil? + s(node, :nil) + else + visit(node.body) + end + end + + # foo => ^(bar) + # ^^^^^^ + def visit_pinned_expression_node(node) + node.expression.accept(copy_compiler(in_pattern: false)) + end + + # foo = 1 and bar => ^foo + # ^^^^ + def visit_pinned_variable_node(node) + if node.variable.is_a?(LocalVariableReadNode) && node.variable.name.match?(/^_\d$/) + s(node, :lvar, node.variable.name) + else + visit(node.variable) + end + end + + # END {} + def visit_post_execution_node(node) + s(node, :iter, s(node, :postexe), 0, visit(node.statements)) + end + + # BEGIN {} + def visit_pre_execution_node(node) + s(node, :iter, s(node, :preexe), 0, visit(node.statements)) + end + + # The top-level program node. + def visit_program_node(node) + visit(node.statements) + end + + # 0..5 + # ^^^^ + def visit_range_node(node) + if !in_pattern && !node.left.nil? && !node.right.nil? && ([node.left.type, node.right.type] - %i[nil_node integer_node]).empty? + left = node.left.value if node.left.is_a?(IntegerNode) + right = node.right.value if node.right.is_a?(IntegerNode) + s(node, :lit, Range.new(left, right, node.exclude_end?)) + else + s(node, node.exclude_end? ? :dot3 : :dot2, visit_range_bounds_node(node.left), visit_range_bounds_node(node.right)) + end + end + + # If the bounds of a range node are empty parentheses, then they do not + # get replaced by their usual s(:nil), but instead are s(:begin). + private def visit_range_bounds_node(node) + if node.is_a?(ParenthesesNode) && node.body.nil? + s(node, :begin) + else + visit(node) + end + end + + # 1r + # ^^ + def visit_rational_node(node) + s(node, :lit, node.value) + end + + # redo + # ^^^^ + def visit_redo_node(node) + s(node, :redo) + end + + # /foo/ + # ^^^^^ + def visit_regular_expression_node(node) + s(node, :lit, Regexp.new(node.unescaped, node.options)) + end + + # def foo(bar:); end + # ^^^^ + def visit_required_keyword_parameter_node(node) + s(node, :kwarg, node.name) + end + + # def foo(bar); end + # ^^^ + def visit_required_parameter_node(node) + node.name + end + + # foo rescue bar + # ^^^^^^^^^^^^^^ + def visit_rescue_modifier_node(node) + s(node, :rescue, visit(node.expression), s(node.rescue_expression, :resbody, s(node.rescue_expression, :array), visit(node.rescue_expression))) + end + + # begin; rescue; end + # ^^^^^^^ + def visit_rescue_node(node) + exceptions = + if node.exceptions.length == 1 && node.exceptions.first.is_a?(SplatNode) + visit(node.exceptions.first) + else + s(node, :array).concat(visit_all(node.exceptions)) + end + + if !node.reference.nil? + exceptions << (visit(node.reference) << s(node.reference, :gvar, :"$!")) + end + + s(node, :resbody, exceptions).concat(node.statements.nil? ? [nil] : visit_all(node.statements.body)) + end + + # def foo(*bar); end + # ^^^^ + # + # def foo(*); end + # ^ + def visit_rest_parameter_node(node) + :"*#{node.name}" + end + + # retry + # ^^^^^ + def visit_retry_node(node) + s(node, :retry) + end + + # return + # ^^^^^^ + # + # return 1 + # ^^^^^^^^ + def visit_return_node(node) + if node.arguments.nil? + s(node, :return) + elsif node.arguments.arguments.length == 1 + argument = node.arguments.arguments.first + s(node, :return, argument.is_a?(SplatNode) ? s(node, :svalue, visit(argument)) : visit(argument)) + else + s(node, :return, s(node, :array).concat(visit_all(node.arguments.arguments))) + end + end + + # self + # ^^^^ + def visit_self_node(node) + s(node, :self) + end + + # A shareable constant. + def visit_shareable_constant_node(node) + visit(node.write) + end + + # class << self; end + # ^^^^^^^^^^^^^^^^^^ + def visit_singleton_class_node(node) + s(node, :sclass, visit(node.expression)).tap do |sexp| + sexp << node.body.accept(copy_compiler(in_def: false)) unless node.body.nil? + end + end + + # __ENCODING__ + # ^^^^^^^^^^^^ + def visit_source_encoding_node(node) + # TODO + s(node, :colon2, s(node, :const, :Encoding), :UTF_8) + end + + # __FILE__ + # ^^^^^^^^ + def visit_source_file_node(node) + s(node, :str, node.filepath) + end + + # __LINE__ + # ^^^^^^^^ + def visit_source_line_node(node) + s(node, :lit, node.location.start_line) + end + + # foo(*bar) + # ^^^^ + # + # def foo((bar, *baz)); end + # ^^^^ + # + # def foo(*); bar(*); end + # ^ + def visit_splat_node(node) + if node.expression.nil? + s(node, :splat) + else + s(node, :splat, visit(node.expression)) + end + end + + # A list of statements. + def visit_statements_node(node) + first, *rest = node.body + + if rest.empty? + visit(first) + else + s(node, :block).concat(visit_all(node.body)) + end + end + + # "foo" + # ^^^^^ + def visit_string_node(node) + s(node, :str, node.unescaped) + end + + # super(foo) + # ^^^^^^^^^^ + def visit_super_node(node) + arguments = node.arguments&.arguments || [] + block = node.block + + if block.is_a?(BlockArgumentNode) + arguments << block + block = nil + end + + visit_block(node, s(node, :super).concat(visit_all(arguments)), block) + end + + # :foo + # ^^^^ + def visit_symbol_node(node) + node.value == "!@" ? s(node, :lit, :"!@") : s(node, :lit, node.unescaped.to_sym) + end + + # true + # ^^^^ + def visit_true_node(node) + s(node, :true) + end + + # undef foo + # ^^^^^^^^^ + def visit_undef_node(node) + names = node.names.map { |name| s(node, :undef, visit(name)) } + names.length == 1 ? names.first : s(node, :block).concat(names) + end + + # unless foo; bar end + # ^^^^^^^^^^^^^^^^^^^ + # + # bar unless foo + # ^^^^^^^^^^^^^^ + def visit_unless_node(node) + s(node, :if, visit(node.predicate), visit(node.consequent), visit(node.statements)) + end + + # until foo; bar end + # ^^^^^^^^^^^^^^^^^ + # + # bar until foo + # ^^^^^^^^^^^^^ + def visit_until_node(node) + s(node, :until, visit(node.predicate), visit(node.statements), !node.begin_modifier?) + end + + # case foo; when bar; end + # ^^^^^^^^^^^^^ + def visit_when_node(node) + s(node, :when, s(node, :array).concat(visit_all(node.conditions))).concat(node.statements.nil? ? [nil] : visit_all(node.statements.body)) + end + + # while foo; bar end + # ^^^^^^^^^^^^^^^^^^ + # + # bar while foo + # ^^^^^^^^^^^^^ + def visit_while_node(node) + s(node, :while, visit(node.predicate), visit(node.statements), !node.begin_modifier?) + end + + # `foo` + # ^^^^^ + def visit_x_string_node(node) + result = s(node, :xstr, node.unescaped) + + if node.heredoc? + result.line = node.content_loc.start_line + result.line_max = node.content_loc.end_line + end + + result + end + + # yield + # ^^^^^ + # + # yield 1 + # ^^^^^^^ + def visit_yield_node(node) + s(node, :yield).concat(visit_all(node.arguments&.arguments || [])) + end + + private + + # Create a new compiler with the given options. + def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern) + Compiler.new(file, in_def: in_def, in_pattern: in_pattern) + end + + # Create a new Sexp object from the given prism node and arguments. + def s(node, *arguments) + result = Sexp.new(*arguments) + result.file = file + result.line = node.location.start_line + result.line_max = node.location.end_line + result + end + + # Visit a block node, which will modify the AST by wrapping the given + # visited node in an iter node. + def visit_block(node, sexp, block) + if block.nil? + sexp + else + parameters = + case block.parameters + when nil, NumberedParametersNode + 0 + else + visit(block.parameters) + end + + if block.body.nil? + s(node, :iter, sexp, parameters) + else + s(node, :iter, sexp, parameters, visit(block.body)) + end + end + end + + # Pattern constants get wrapped in another layer of :const. + def visit_pattern_constant(node) + case node + when nil + # nothing + when ConstantReadNode + visit(node) + else + s(node, :const, visit(node)) + end + end + + # Visit the value of a write, which will be on the right-hand side of + # a write operator. Because implicit arrays can have splats, those could + # potentially be wrapped in an svalue node. + def visit_write_value(node) + if node.is_a?(ArrayNode) && node.opening_loc.nil? + if node.elements.length == 1 && node.elements.first.is_a?(SplatNode) + s(node, :svalue, visit(node.elements.first)) + else + s(node, :svalue, visit(node)) + end + else + visit(node) + end + end + end + + private_constant :Compiler + + # Parse the given source and translate it into the seattlerb/ruby_parser + # gem's Sexp format. + def parse(source, filepath = "(string)") + translate(Prism.parse(source, filepath: filepath, scopes: [[]]), filepath) + end + + # Parse the given file and translate it into the seattlerb/ruby_parser + # gem's Sexp format. + def parse_file(filepath) + translate(Prism.parse_file(filepath, scopes: [[]]), filepath) + end + + class << self + # Parse the given source and translate it into the seattlerb/ruby_parser + # gem's Sexp format. + def parse(source, filepath = "(string)") + new.parse(source, filepath) + end + + # Parse the given file and translate it into the seattlerb/ruby_parser + # gem's Sexp format. + def parse_file(filepath) + new.parse_file(filepath) + end + end + + private + + # Translate the given parse result and filepath into the + # seattlerb/ruby_parser gem's Sexp format. + def translate(result, filepath) + if result.failure? + error = result.errors.first + raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}" + end + + result.value.accept(Compiler.new(filepath)) + end + end + end +end |