diff options
Diffstat (limited to 'lib/prism')
32 files changed, 3073 insertions, 1435 deletions
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb index e3b15fc3b0..c64d03f64a 100644 --- a/lib/prism/desugar_compiler.rb +++ b/lib/prism/desugar_compiler.rb @@ -1,11 +1,18 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism class DesugarAndWriteNode # :nodoc: include DSL - attr_reader :node, :default_source, :read_class, :write_class, :arguments + attr_reader :node #: ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + #: ((ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void def initialize(node, default_source, read_class, write_class, **arguments) @node = node @default_source = default_source @@ -15,6 +22,8 @@ module Prism end # Desugar `x &&= y` to `x && x = y` + #-- + #: () -> node def compile and_node( location: node.location, @@ -35,8 +44,12 @@ module Prism class DesugarOrWriteDefinedNode # :nodoc: include DSL - attr_reader :node, :default_source, :read_class, :write_class, :arguments + attr_reader :node #: ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + #: ((ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void def initialize(node, default_source, read_class, write_class, **arguments) @node = node @default_source = default_source @@ -46,6 +59,8 @@ module Prism end # Desugar `x ||= y` to `defined?(x) ? x : x = y` + #-- + #: () -> node def compile if_node( location: node.location, @@ -86,8 +101,12 @@ module Prism class DesugarOperatorWriteNode # :nodoc: include DSL - attr_reader :node, :default_source, :read_class, :write_class, :arguments + attr_reader :node #: ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + #: ((ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void def initialize(node, default_source, read_class, write_class, **arguments) @node = node @default_source = default_source @@ -97,6 +116,8 @@ module Prism end # Desugar `x += y` to `x = x + y` + #-- + #: () -> node def compile binary_operator_loc = node.binary_operator_loc.chop @@ -130,8 +151,12 @@ module Prism class DesugarOrWriteNode # :nodoc: include DSL - attr_reader :node, :default_source, :read_class, :write_class, :arguments + attr_reader :node #: InstanceVariableOrWriteNode | LocalVariableOrWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + #: ((InstanceVariableOrWriteNode | LocalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void def initialize(node, default_source, read_class, write_class, **arguments) @node = node @default_source = default_source @@ -141,6 +166,8 @@ module Prism end # Desugar `x ||= y` to `x || x = y` + #-- + #: () -> node def compile or_node( location: node.location, @@ -161,90 +188,105 @@ module Prism private_constant :DesugarAndWriteNode, :DesugarOrWriteNode, :DesugarOrWriteDefinedNode, :DesugarOperatorWriteNode class ClassVariableAndWriteNode + #: () -> node def desugar # :nodoc: DesugarAndWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile end end class ClassVariableOrWriteNode + #: () -> node def desugar # :nodoc: DesugarOrWriteDefinedNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile end end class ClassVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: DesugarOperatorWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile end end class ConstantAndWriteNode + #: () -> node def desugar # :nodoc: DesugarAndWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile end end class ConstantOrWriteNode + #: () -> node def desugar # :nodoc: DesugarOrWriteDefinedNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile end end class ConstantOperatorWriteNode + #: () -> node def desugar # :nodoc: DesugarOperatorWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile end end class GlobalVariableAndWriteNode + #: () -> node def desugar # :nodoc: DesugarAndWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile end end class GlobalVariableOrWriteNode + #: () -> node def desugar # :nodoc: DesugarOrWriteDefinedNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile end end class GlobalVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: DesugarOperatorWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile end end class InstanceVariableAndWriteNode + #: () -> node def desugar # :nodoc: DesugarAndWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile end end class InstanceVariableOrWriteNode + #: () -> node def desugar # :nodoc: DesugarOrWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile end end class InstanceVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: DesugarOperatorWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile end end class LocalVariableAndWriteNode + #: () -> node def desugar # :nodoc: DesugarAndWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile end end class LocalVariableOrWriteNode + #: () -> node def desugar # :nodoc: DesugarOrWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile end end class LocalVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: DesugarOperatorWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile end @@ -253,137 +295,167 @@ module Prism # DesugarCompiler is a compiler that desugars Ruby code into a more primitive # form. This is useful for consumers that want to deal with fewer node types. class DesugarCompiler < MutationCompiler - # @@foo &&= bar + # `@@foo &&= bar` # # becomes # - # @@foo && @@foo = bar + # `@@foo && @@foo = bar` + #-- + #: (ClassVariableAndWriteNode node) -> node def visit_class_variable_and_write_node(node) node.desugar end - # @@foo ||= bar + # `@@foo ||= bar` # # becomes # - # defined?(@@foo) ? @@foo : @@foo = bar + # `defined?(@@foo) ? @@foo : @@foo = bar` + #-- + #: (ClassVariableOrWriteNode node) -> node def visit_class_variable_or_write_node(node) node.desugar end - # @@foo += bar + # `@@foo += bar` # # becomes # - # @@foo = @@foo + bar + # `@@foo = @@foo + bar` + #-- + #: (ClassVariableOperatorWriteNode node) -> node def visit_class_variable_operator_write_node(node) node.desugar end - # Foo &&= bar + # `Foo &&= bar` # # becomes # - # Foo && Foo = bar + # `Foo && Foo = bar` + #-- + #: (ConstantAndWriteNode node) -> node def visit_constant_and_write_node(node) node.desugar end - # Foo ||= bar + # `Foo ||= bar` # # becomes # - # defined?(Foo) ? Foo : Foo = bar + # `defined?(Foo) ? Foo : Foo = bar` + #-- + #: (ConstantOrWriteNode node) -> node def visit_constant_or_write_node(node) node.desugar end - # Foo += bar + # `Foo += bar` # # becomes # - # Foo = Foo + bar + # `Foo = Foo + bar` + #-- + #: (ConstantOperatorWriteNode node) -> node def visit_constant_operator_write_node(node) node.desugar end - # $foo &&= bar + # `$foo &&= bar` # # becomes # - # $foo && $foo = bar + # `$foo && $foo = bar` + #-- + #: (GlobalVariableAndWriteNode node) -> node def visit_global_variable_and_write_node(node) node.desugar end - # $foo ||= bar + # `$foo ||= bar` # # becomes # - # defined?($foo) ? $foo : $foo = bar + # `defined?($foo) ? $foo : $foo = bar` + #-- + #: (GlobalVariableOrWriteNode node) -> node def visit_global_variable_or_write_node(node) node.desugar end - # $foo += bar + # `$foo += bar` # # becomes # - # $foo = $foo + bar + # `$foo = $foo + bar` + #-- + #: (GlobalVariableOperatorWriteNode node) -> node def visit_global_variable_operator_write_node(node) node.desugar end - # @foo &&= bar + # `@foo &&= bar` # # becomes # - # @foo && @foo = bar + # `@foo && @foo = bar` + #-- + #: (InstanceVariableAndWriteNode node) -> node def visit_instance_variable_and_write_node(node) node.desugar end - # @foo ||= bar + # `@foo ||= bar` # # becomes # - # @foo || @foo = bar + # `@foo || @foo = bar` + #-- + #: (InstanceVariableOrWriteNode node) -> node def visit_instance_variable_or_write_node(node) node.desugar end - # @foo += bar + # `@foo += bar` # # becomes # - # @foo = @foo + bar + # `@foo = @foo + bar` + #-- + #: (InstanceVariableOperatorWriteNode node) -> node def visit_instance_variable_operator_write_node(node) node.desugar end - # foo &&= bar + # `foo &&= bar` # # becomes # - # foo && foo = bar + # `foo && foo = bar` + #-- + #: (LocalVariableAndWriteNode node) -> node def visit_local_variable_and_write_node(node) node.desugar end - # foo ||= bar + # `foo ||= bar` # # becomes # - # foo || foo = bar + # `foo || foo = bar` + #-- + #: (LocalVariableOrWriteNode node) -> node def visit_local_variable_or_write_node(node) node.desugar end - # foo += bar + # `foo += bar` # # becomes # - # foo = foo + bar + # `foo = foo + bar` + #-- + #: (LocalVariableOperatorWriteNode node) -> node def visit_local_variable_operator_write_node(node) node.desugar end diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index a0da0b6195..6b9bde51ea 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown # typed: ignore # This file is responsible for mirroring the API provided by the C extension by @@ -11,7 +12,7 @@ require "ffi" # autoloaded from within a non-main Ractor. require "prism/serialize" if defined?(Ractor) -module Prism +module Prism # :nodoc: module LibRubyParser # :nodoc: extend FFI::Library @@ -58,6 +59,9 @@ module Prism # We only want to load the functions that we are interested in. next unless functions.any? { |function| line.include?(function) } + # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) + line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") + # Parse the function declaration. unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line raise "Could not parse #{line}" @@ -84,29 +88,44 @@ module Prism raise "Could not find functions #{functions.inspect}" unless functions.empty? end - callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer - enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY] + callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_source_stream_feof_t, [:pointer], :int + pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] + enum :pm_source_init_result_t, pm_source_init_result_values enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] + # Ractor-safe lookup table for pm_source_init_result_t, since FFI's + # enum_type accesses module instance variables that are not shareable. + SOURCE_INIT_RESULT = pm_source_init_result_values.freeze + load_exported_functions_from( - "prism.h", + "prism/version.h", "pm_version", + [] + ) + + load_exported_functions_from( + "prism/serialize.h", "pm_serialize_parse", "pm_serialize_parse_stream", "pm_serialize_parse_comments", "pm_serialize_lex", "pm_serialize_parse_lex", - "pm_parse_success_p", + "pm_serialize_parse_success_p", + [] + ) + + load_exported_functions_from( + "prism/string_query.h", "pm_string_query_local", "pm_string_query_constant", "pm_string_query_method_name", - [:pm_parse_stream_fgets_t] + [] ) load_exported_functions_from( - "prism/util/pm_buffer.h", - "pm_buffer_sizeof", - "pm_buffer_init", + "prism/buffer.h", + "pm_buffer_new", "pm_buffer_value", "pm_buffer_length", "pm_buffer_free", @@ -114,20 +133,19 @@ module Prism ) load_exported_functions_from( - "prism/util/pm_string.h", - "pm_string_mapped_init", - "pm_string_free", - "pm_string_source", - "pm_string_length", - "pm_string_sizeof", - [] + "prism/source.h", + "pm_source_file_new", + "pm_source_mapped_new", + "pm_source_stream_new", + "pm_source_free", + "pm_source_source", + "pm_source_length", + [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] ) # This object represents a pm_buffer_t. We only use it as an opaque pointer, # so it doesn't need to know the fields of pm_buffer_t. class PrismBuffer # :nodoc: - SIZEOF = LibRubyParser.pm_buffer_sizeof - attr_reader :pointer def initialize(pointer) @@ -149,20 +167,20 @@ module Prism # Initialize a new buffer and yield it to the block. The buffer will be # automatically freed when the block returns. def self.with - FFI::MemoryPointer.new(SIZEOF) do |pointer| - raise unless LibRubyParser.pm_buffer_init(pointer) - return yield new(pointer) + buffer = LibRubyParser.pm_buffer_new + raise unless buffer + + begin + yield new(buffer) ensure - LibRubyParser.pm_buffer_free(pointer) + LibRubyParser.pm_buffer_free(buffer) end end end - # This object represents a pm_string_t. We only use it as an opaque pointer, - # so it doesn't have to be an FFI::Struct. - class PrismString # :nodoc: - SIZEOF = LibRubyParser.pm_string_sizeof - + # This object represents source code to be parsed. For strings it wraps a + # pointer directly; for files it uses a pm_source_t under the hood. + class PrismSource # :nodoc: PLATFORM_EXPECTS_UTF8 = RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) @@ -179,7 +197,7 @@ module Prism @pointer.read_string(@length) end - # Yields a pm_string_t pointer to the given block. + # Yields a PrismSource backed by the given string to the block. def self.with_string(string) raise TypeError unless string.is_a?(String) @@ -193,32 +211,38 @@ module Prism end end - # Yields a pm_string_t pointer to the given block. + # Yields a PrismSource to the given block, backed by a pm_source_t. def self.with_file(filepath) raise TypeError unless filepath.is_a?(String) # On Windows and Mac, it's expected that filepaths will be encoded in # UTF-8. If they are not, we need to convert them to UTF-8 before - # passing them into pm_string_mapped_init. + # passing them into pm_source_mapped_new. if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 filepath = filepath.encode(Encoding::UTF_8) end - FFI::MemoryPointer.new(SIZEOF) do |pm_string| - case (result = LibRubyParser.pm_string_mapped_init(pm_string, filepath)) - when :PM_STRING_INIT_SUCCESS - pointer = LibRubyParser.pm_string_source(pm_string) - length = LibRubyParser.pm_string_length(pm_string) + FFI::MemoryPointer.new(:int) do |result_ptr| + pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) + + case SOURCE_INIT_RESULT[result_ptr.read_int] + when :PM_SOURCE_INIT_SUCCESS + pointer = LibRubyParser.pm_source_source(pm_source) + length = LibRubyParser.pm_source_length(pm_source) return yield new(pointer, length, false) - when :PM_STRING_INIT_ERROR_GENERIC + when :PM_SOURCE_INIT_ERROR_GENERIC raise SystemCallError.new(filepath, FFI.errno) - when :PM_STRING_INIT_ERROR_DIRECTORY + when :PM_SOURCE_INIT_ERROR_DIRECTORY raise Errno::EISDIR.new(filepath) + when :PM_SOURCE_INIT_ERROR_NON_REGULAR + # Fall back to reading the file through Ruby IO for non-regular + # files (pipes, character devices, etc.) + return with_string(File.read(filepath)) { |string| yield string } else - raise "Unknown error initializing pm_string_t: #{result.inspect}" + raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" end ensure - LibRubyParser.pm_string_free(pm_string) + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? end end end @@ -234,29 +258,29 @@ module Prism class << self # Mirror the Prism.dump API by using the serialization API. def dump(source, **options) - LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) } + LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) } end # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) } end # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) } end # Mirror the Prism.parse API by using the serialization API. def parse(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) } end # Mirror the Prism.parse_file API by using the serialization API. This uses @@ -264,7 +288,7 @@ module Prism # when it is available. def parse_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) } end # Mirror the Prism.parse_stream API by using the serialization API. @@ -280,19 +304,21 @@ module Prism end } - # In the pm_serialize_parse_stream function it accepts a pointer to the - # IO object as a void* and then passes it through to the callback as the - # third argument, but it never touches it itself. As such, since we have - # access to the IO object already through the closure of the lambda, we - # can pass a null pointer here and not worry. - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options)) - Prism.load(source, buffer.read, options.fetch(:freeze, false)) + eof_callback = -> (_) { stream.eof? } + + pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) + begin + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end end end # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) } end # Mirror the Prism.parse_file_comments API by using the serialization @@ -300,23 +326,23 @@ module Prism # to use mmap when it is available. def parse_file_comments(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } end # Mirror the Prism.parse_lex API by using the serialization API. def parse_lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) } end # Mirror the Prism.parse_lex_file API by using the serialization API. def parse_lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } end # Mirror the Prism.parse_success? API by using the serialization API. def parse_success?(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) } end # Mirror the Prism.parse_failure? API by using the serialization API. @@ -327,7 +353,7 @@ module Prism # Mirror the Prism.parse_file_success? API by using the serialization API. def parse_file_success?(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) } end # Mirror the Prism.parse_file_failure? API by using the serialization API. @@ -337,7 +363,7 @@ module Prism # Mirror the Prism.profile API by using the serialization API. def profile(source, **options) - LibRubyParser::PrismString.with_string(source) do |string| + LibRubyParser::PrismSource.with_string(source) do |string| LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) nil @@ -347,7 +373,7 @@ module Prism # Mirror the Prism.profile_file API by using the serialization API. def profile_file(filepath, **options) - LibRubyParser::PrismString.with_file(filepath) do |string| + LibRubyParser::PrismSource.with_file(filepath) do |string| LibRubyParser::PrismBuffer.with do |buffer| options[:filepath] = filepath LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) @@ -396,7 +422,7 @@ module Prism end def parse_file_success_common(string, options) # :nodoc: - LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) + LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) end # Return the value that should be dumped for the command_line option. @@ -420,16 +446,35 @@ module Prism # Return the value that should be dumped for the version option. def dump_options_version(version) case version - when nil, "latest" - 0 + when "current" + version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) + when "latest", nil + 0 # Handled in pm_parser_init + when "nearest" + dump = version_string_to_number(RUBY_VERSION) + return dump if dump + if RUBY_VERSION < "3.3" + version_string_to_number("3.3") + else + 0 # Handled in pm_parser_init + end + else + version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") + end + end + + # Converts a version string like "4.0.0" or "4.0" into a number. + # Returns nil if the version is unknown. + def version_string_to_number(version) + case version when /\A3\.3(\.\d+)?\z/ 1 when /\A3\.4(\.\d+)?\z/ 2 - when /\A3\.5(\.\d+)?\z/ - 0 - else - raise ArgumentError, "invalid version: #{version}" + when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ + 3 + when /\A4\.1(\.\d+)?\z/ + 4 end end @@ -531,7 +576,7 @@ module Prism # Here we are going to patch StringQuery to put in the class-level methods so # that it can maintain a consistent interface - class StringQuery + class StringQuery # :nodoc: class << self # Mirrors the C extension's StringQuery::local? method. def local?(string) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index a83c24cb41..7aacec037d 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -1,28 +1,64 @@ # frozen_string_literal: true - -require "delegate" -require "ripper" +# :markup: markdown +#-- +# rbs_inline: enabled module Prism + # @rbs! + # module Translation + # class Ripper + # EXPR_NONE: Integer + # EXPR_BEG: Integer + # EXPR_MID: Integer + # EXPR_END: Integer + # EXPR_CLASS: Integer + # EXPR_VALUE: Integer + # EXPR_ARG: Integer + # EXPR_CMDARG: Integer + # EXPR_ENDARG: Integer + # EXPR_ENDFN: Integer + # + # class Lexer < Ripper + # class State + # def self.[]: (Integer value) -> State + # end + # end + # + # class LineAndColumnCache + # def initialize: (Source source) -> void + # + # def line_and_column: (Integer byte_offset) -> [Integer, Integer] + # end + # end + # end + # This class is responsible for lexing the source using prism and then # converting those tokens to be compatible with Ripper. In the vast majority # of cases, this is a one-to-one mapping of the token type. Everything else # generally lines up. However, there are a few cases that require special # handling. class LexCompat # :nodoc: + # @rbs! + # # A token produced by the Ripper lexer that Prism is replicating. + # type lex_compat_token = [[Integer, Integer], Symbol, String, untyped] + # A result class specialized for holding tokens produced by the lexer. class Result < Prism::Result # The list of tokens that were produced by the lexer. - attr_reader :value + attr_reader :value #: Array[lex_compat_token] # Create a new lex compat result object with the given values. - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #-- + #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for Result. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: super.merge!(value: value) end end @@ -104,6 +140,7 @@ module Prism KEYWORD_DEF: :on_kw, KEYWORD_DEFINED: :on_kw, KEYWORD_DO: :on_kw, + KEYWORD_DO_BLOCK: :on_kw, KEYWORD_DO_LOOP: :on_kw, KEYWORD_ELSE: :on_kw, KEYWORD_ELSIF: :on_kw, @@ -198,93 +235,6 @@ module Prism "__END__": :on___end__ }.freeze - # When we produce tokens, we produce the same arrays that Ripper does. - # However, we add a couple of convenience methods onto them to make them a - # little easier to work with. We delegate all other methods to the array. - class Token < SimpleDelegator - # @dynamic initialize, each, [] - - # The location of the token in the source. - def location - self[0] - end - - # The type of the token. - def event - self[1] - end - - # The slice of the source that this token represents. - def value - self[2] - end - - # The state of the lexer when this token was produced. - def state - self[3] - end - end - - # Ripper doesn't include the rest of the token in the event, so we need to - # trim it down to just the content on the first line when comparing. - class EndContentToken < Token - def ==(other) # :nodoc: - [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other - end - end - - # Tokens where state should be ignored - # used for :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) # :nodoc: - self[0...-1] == other[0...-1] - end - end - - # Ident tokens for the most part are exactly the same, except sometimes we - # know an ident is a local when ripper doesn't (when they are introduced - # through named captures in regular expressions). In that case we don't - # compare the state. - class IdentToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || - (other[3] & Ripper::EXPR_ARG_ANY != 0) - ) - end - end - - # Ignored newlines can occasionally have a LABEL state attached to them, so - # we compare the state differently here. - class IgnoredNewlineToken < Token - def ==(other) # :nodoc: - return false unless self[0...-1] == other[0...-1] - - if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED - other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0 - else - self[3] == other[3] - end - end - end - - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a parent - # scope named bar because it hasn't pushed the local table yet. We do this - # more accurately, so we need to allow comparing against both END and - # END|LABEL. - class ParamToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_END) || - (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) - ) - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -294,16 +244,19 @@ module Prism # order back into the token stream and set the state of the last token to # the state that the heredoc was opened in. class PlainHeredoc # :nodoc: - attr_reader :tokens + attr_reader :tokens #: Array[lex_compat_token] + #: () -> void def initialize @tokens = [] end + #: (lex_compat_token token) -> void def <<(token) tokens << token end + #: () -> Array[lex_compat_token] def to_a tokens end @@ -313,22 +266,26 @@ module Prism # that need to be split on "\\\n" to mimic Ripper's behavior. We also need # to keep track of the state that the heredoc was opened in. class DashHeredoc # :nodoc: - attr_reader :split, :tokens + attr_reader :split #: bool + attr_reader :tokens #: Array[lex_compat_token] + #: (bool split) -> void def initialize(split) @split = split @tokens = [] end + #: (lex_compat_token token) -> void def <<(token) tokens << token end + #: () -> Array[lex_compat_token] def to_a embexpr_balance = 0 - tokens.each_with_object([]) do |token, results| #$ Array[Token] - case token.event + tokens.each_with_object([]) do |token, results| #$ Array[lex_compat_token] + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -343,9 +300,9 @@ module Prism if split # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind # to keep the delimiter in the result. - token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| + token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += value.count("\n") end else @@ -374,8 +331,13 @@ module Prism class DedentingHeredoc # :nodoc: TAB_WIDTH = 8 - attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance + attr_reader :tokens #: Array[lex_compat_token] + attr_reader :dedent_next #: bool + attr_reader :dedent #: Integer? + attr_reader :embexpr_balance #: Integer + # @rbs @ended_on_newline: bool + #: () -> void def initialize @tokens = [] @dedent_next = true @@ -387,8 +349,10 @@ module Prism # As tokens are coming in, we track the minimum amount of common leading # whitespace on plain string content tokens. This allows us to later # remove that amount of whitespace from the beginning of each line. + # + #: (lex_compat_token token) -> void def <<(token) - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg @embexpr_balance += 1 @dedent = 0 if @dedent_next && @ended_on_newline @@ -396,10 +360,10 @@ module Prism @embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - line = token.value + line = token[2] if dedent_next && !(line.strip.empty? && line.end_with?("\n")) - leading = line[/\A(\s*)\n?/, 1] + leading = line[/\A(\s*)\n?/, 1] #: String next_dedent = 0 leading.each_char do |char| @@ -419,20 +383,21 @@ module Prism end end - @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 + @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0 @ended_on_newline = false tokens << token end + #: () -> Array[lex_compat_token] def to_a # If every line in the heredoc is blank, we still need to split up the # string content token into multiple tokens. if dedent.nil? - results = [] #: Array[Token] + results = [] #: Array[lex_compat_token] embexpr_balance = 0 tokens.each do |token| - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 results << token @@ -444,9 +409,9 @@ module Prism lineno = token[0][0] column = token[0][1] - token.value.split(/(?<=\n)/).each_with_index do |value, index| + token[2].split(/(?<=\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += 1 end else @@ -463,7 +428,7 @@ module Prism # If the minimum common whitespace is 0, then we need to concatenate # string nodes together that are immediately adjacent. if dedent == 0 - results = [] #: Array[Token] + results = [] #: Array[lex_compat_token] embexpr_balance = 0 index = 0 @@ -474,15 +439,15 @@ module Prism results << token index += 1 - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 when :on_embexpr_end, :on_heredoc_end embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/) - token.value << tokens[index].value + while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/) + token[2] << tokens[index][2] index += 1 end end @@ -496,7 +461,7 @@ module Prism # insert on_ignored_sp tokens for the amount of dedent that we need to # perform. We also need to remove the dedent from the beginning of # each line of plain string content tokens. - results = [] #: Array[Token] + results = [] #: Array[lex_compat_token] dedent_next = true embexpr_balance = 0 @@ -505,7 +470,7 @@ module Prism # whitespace calculation we performed above. This is because # checking if the subsequent token needs to be dedented is common to # both the dedent calculation and the ignored_sp insertion. - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -517,7 +482,7 @@ module Prism # Here we're going to split the string on newlines, but maintain # the newlines in the resulting array. We'll do that with a look # behind assertion. - splits = token.value.split(/(?<=\n)/) + splits = token[2].split(/(?<=\n)/) index = 0 while index < splits.length @@ -535,7 +500,8 @@ module Prism # line or this line doesn't start with whitespace, then we # should concatenate the rest of the string to match ripper. if dedent == 0 && (!dedent_next || !line.start_with?(/\s/)) - line = splits[index..].join + unjoined = splits[index..] #: Array[String] + line = unjoined.join index = splits.length end @@ -574,12 +540,12 @@ module Prism ignored = deleted_chars.join line.delete_prefix!(ignored) - results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) + results << [[lineno, 0], :on_ignored_sp, ignored, token[3]] column = ignored.length end end - results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? + results << [[lineno, column], token[1], line, token[3]] unless line.empty? index += 1 end else @@ -590,7 +556,7 @@ module Prism end dedent_next = - ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && + ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) && embexpr_balance == 0 end @@ -600,12 +566,14 @@ module Prism # Here we will split between the two types of heredocs and return the # object that will store their tokens. + #-- + #: (lex_compat_token opening) -> (PlainHeredoc | DashHeredoc | DedentingHeredoc) def self.build(opening) - case opening.value[2] + case opening[2][2] when "~" DedentingHeredoc.new when "-" - DashHeredoc.new(opening.value[3] != "'") + DashHeredoc.new(opening[2][3] != "'") else PlainHeredoc.new end @@ -614,33 +582,43 @@ module Prism private_constant :Heredoc - attr_reader :source, :options + # In previous versions of Ruby, Ripper wouldn't flush the bom before the + # first token, so we had to have a hack in place to account for that. + BOM_FLUSHED = RUBY_VERSION >= "3.3.0" + private_constant :BOM_FLUSHED + attr_reader :options #: Hash[Symbol, untyped] + # @rbs @source: String + + #: (String source, **untyped options) -> void def initialize(source, **options) @source = source @options = options end + #: () -> Result def result - tokens = [] #: Array[LexCompat::Token] + tokens = [] #: Array[lex_compat_token] state = :default heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]] - result = Prism.lex(source, **options) + result = Prism.lex(@source, **options) + source = result.source result_value = result.value - previous_state = nil #: Ripper::Lexer::State? + previous_state = nil #: Translation::Ripper::Lexer::State? last_heredoc_end = nil #: Integer? + eof_token = nil #: Token? + + bom = source.slice(0, 3) == "\xEF\xBB\xBF" - # In previous versions of Ruby, Ripper wouldn't flush the bom before the - # first token, so we had to have a hack in place to account for that. This - # checks for that behavior. - bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 - bom = source.byteslice(0..2) == "\xEF\xBB\xBF" + result_value.each_with_index do |(prism_token, prism_state), index| + lineno = prism_token.location.start_line + column = prism_token.location.start_column - result_value.each_with_index do |(token, lex_state), index| - lineno = token.location.start_line - column = token.location.start_column + event = RIPPER.fetch(prism_token.type) + value = prism_token.value + lex_state = Translation::Ripper::Lexer::State[prism_state] # If there's a UTF-8 byte-order mark as the start of the file, then for # certain tokens ripper sets the first token back by 3 bytes. It also @@ -650,70 +628,53 @@ module Prism if bom && lineno == 1 column -= 3 - if index == 0 && column == 0 && !bom_flushed + if index == 0 && column == 0 && !BOM_FLUSHED flushed = - case token.type + case prism_token.type when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I, :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I, :PERCENT_UPPER_W, :STRING_BEGIN true when :REGEXP_BEGIN, :SYMBOL_BEGIN - token.value.start_with?("%") + value.start_with?("%") else false end unless flushed column -= 3 - value = token.value value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding)) end end end - event = RIPPER.fetch(token.type) - value = token.value - lex_state = Ripper::Lexer::State.new(lex_state) - - token = + lex_compat_token = case event when :on___end__ - EndContentToken.new([[lineno, column], event, value, lex_state]) + # Ripper doesn't include the rest of the token in the event, so we need to + # trim it down to just the content on the first line. + value = value[0..value.index("\n")] #: String + [[lineno, column], event, value, lex_state] when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_heredoc_end # Heredoc end tokens can be emitted in an odd order, so we don't # want to bother comparing the state on them. - last_heredoc_end = token.location.end_offset - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ident - if lex_state == Ripper::EXPR_END - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a - # parent scope named bar because it hasn't pushed the local table - # yet. We do this more accurately, so we need to allow comparing - # against both END and END|LABEL. - ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL - # In the event that we're comparing identifiers, we're going to - # allow a little divergence. Ripper doesn't account for local - # variables introduced through named captures in regexes, and we - # do, which accounts for this difference. - IdentToken.new([[lineno, column], event, value, lex_state]) - else - Token.new([[lineno, column], event, value, lex_state]) - end + last_heredoc_end = prism_token.location.end_offset + [[lineno, column], event, value, lex_state] when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ignored_nl - # Ignored newlines can occasionally have a LABEL state attached to - # them which doesn't actually impact anything. We don't mirror that - # state so we ignored it. - IgnoredNewlineToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] + when :on_words_sep + # Ripper emits one token each per line. + value.each_line.with_index do |line, index| + if index > 0 + lineno += 1 + column = 0 + end + tokens << [[lineno, column], event, line, lex_state] + end + tokens.pop #: lex_compat_token when :on_regexp_end # On regex end, Ripper scans and then sets end state, so the ripper # lexed output is begin, when it should be end. prism sets lex state @@ -738,13 +699,14 @@ module Prism counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Ripper::Lexer::State.new(result_value[current_index][1]) + Translation::Ripper::Lexer::State[result_value[current_index][1]] else previous_state end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_eof + eof_token = prism_token previous_token = result_value[index - 1][0] # If we're at the end of the file and the previous token was a @@ -759,7 +721,7 @@ module Prism # Use the greater offset of the two to determine the start of # the trailing whitespace. start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max - end_offset = token.location.start_offset + end_offset = prism_token.location.start_offset if start_offset < end_offset if bom @@ -767,14 +729,14 @@ module Prism end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state]) + tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state] end end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] else - Token.new([[lineno, column], event, value, lex_state]) - end + [[lineno, column], event, value, lex_state] + end #: lex_compat_token previous_state = lex_state @@ -791,19 +753,19 @@ module Prism when :default # The default state is when there are no heredocs at all. In this # state we can append the token to the list of tokens and move on. - tokens << token + tokens << lex_compat_token # If we get the declaration of a heredoc, then we open a new heredoc # and move into the heredoc_opened state. if event == :on_heredoc_beg state = :heredoc_opened - heredoc_stack.last << Heredoc.build(token) + heredoc_stack.last << Heredoc.build(lex_compat_token) end when :heredoc_opened # The heredoc_opened state is when we've seen the declaration of a # heredoc and are now lexing the body of the heredoc. In this state we # push tokens onto the most recently created heredoc. - heredoc_stack.last.last << token + heredoc_stack.last.last << lex_compat_token case event when :on_heredoc_beg @@ -811,7 +773,7 @@ module Prism # heredoc, this means we have nested heredocs. In this case we'll # push a new heredoc onto the stack and stay in the heredoc_opened # state since we're now lexing the body of the new heredoc. - heredoc_stack << [Heredoc.build(token)] + heredoc_stack << [Heredoc.build(lex_compat_token)] when :on_heredoc_end # If we receive the end of a heredoc, then we're done lexing the # body of the heredoc. In this case we now have a completed heredoc @@ -820,10 +782,10 @@ module Prism state = :heredoc_closed end when :heredoc_closed - if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n")) + if %i[on_nl on_ignored_nl on_comment].include?(event) || ((event == :on_tstring_content) && value.end_with?("\n")) if heredoc_stack.size > 1 - flushing = heredoc_stack.pop - heredoc_stack.last.last << token + flushing = heredoc_stack.pop #: Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc] + heredoc_stack.last.last << lex_compat_token flushing.each do |heredoc| heredoc.to_a.each do |flushed_token| @@ -835,12 +797,12 @@ module Prism next end elsif event == :on_heredoc_beg - tokens << token + tokens << lex_compat_token state = :heredoc_opened - heredoc_stack.last << Heredoc.build(token) + heredoc_stack.last << Heredoc.build(lex_compat_token) next elsif heredoc_stack.size > 1 - heredoc_stack[-2].last << token + heredoc_stack[-2].last << lex_compat_token next end @@ -851,77 +813,94 @@ module Prism heredoc_stack.last.clear state = :default - tokens << token + tokens << lex_compat_token end end - # Drop the EOF token from the list - tokens = tokens[0...-1] - - # We sort by location to compare against Ripper's output - tokens.sort_by!(&:location) - - Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source)) - end - end + # Drop the EOF token from the list. The EOF token may not be + # present if the source was syntax invalid + if tokens.dig(-1, 1) == :on_eof + tokens = tokens[0...-1] #: Array[lex_compat_token] + end - private_constant :LexCompat + # We sort by location because Ripper.lex sorts. + tokens.sort_by! do |token| + line, column = token[0] + source.byte_offset(line, column) + end - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source + tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token) - def initialize(source) - @source = source + Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, result.continuable?, source) end - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_sp - # skip - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - when :on_words_sep - if previous[1] == :on_words_sep - previous[2] << token[2] + private + + #: (Array[lex_compat_token] tokens, Source source, Location? data_loc, bool bom, Token? eof_token) -> Array[lex_compat_token] + def post_process_tokens(tokens, source, data_loc, bom, eof_token) + new_tokens = [] #: Array[lex_compat_token] + + prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] + prev_token_end = bom ? 3 : 0 + + cache = Translation::Ripper::LineAndColumnCache.new(source) + + tokens.each do |token| + # Skip missing heredoc ends. + next if token[1] == :on_heredoc_end && token[2] == "" + + # Add :on_sp tokens. + line, column = token[0] + start_offset = source.byte_offset(line, column) + + # Ripper reports columns on line 1 without counting the BOM, so we + # adjust to get the real offset + start_offset += 3 if line == 1 && bom + + if start_offset > prev_token_end + sp_value = source.slice(prev_token_end, start_offset - prev_token_end) + sp_line, sp_column = cache.line_and_column(prev_token_end) + # Ripper reports columns on line 1 without counting the BOM + sp_column -= 3 if sp_line == 1 && bom + continuation_index = sp_value.byteindex("\\") + + # ripper emits up to three :on_sp tokens when line continuations are used + if continuation_index + next_whitespace_index = continuation_index + 1 + next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r" + next_whitespace_index += 1 + first_whitespace = sp_value[0...continuation_index] #: String + continuation = sp_value[continuation_index...next_whitespace_index] #: String + second_whitespace = sp_value[next_whitespace_index..] || "" + + new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? + new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] + new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty? else - results << token - previous = token + new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state] end - else - results << token - previous = token end - end - - results - end - - private - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) + new_tokens << token + prev_token_state = token[3] + prev_token_end = start_offset + token[2].bytesize end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? + + if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl + end_offset = eof_token.location.end_offset + if prev_token_end < end_offset + new_tokens << [ + [source.line(prev_token_end), source.column(prev_token_end)], + :on_sp, + source.slice(prev_token_end, end_offset - prev_token_end), + prev_token_state + ] end end + + new_tokens end end - private_constant :LexRipper + private_constant :LexCompat end diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb index b007a051ea..8a6624e76d 100644 --- a/lib/prism/node_ext.rb +++ b/lib/prism/node_ext.rb @@ -1,12 +1,17 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled +#-- # Here we are reopening the prism module to provide methods on nodes that aren't # templated and are meant as convenience methods. +#++ module Prism class Node + #: (*String replacements) -> void def deprecated(*replacements) # :nodoc: - location = caller_locations(1, 1) - location = location[0].label if location + location = caller_locations(1, 1)&.[](0)&.label suggest = replacements.map { |replacement| "#{self.class}##{replacement}" } warn(<<~MSG, uplevel: 1, category: :deprecated) @@ -20,7 +25,9 @@ module Prism module RegularExpressionOptions # :nodoc: # Returns a numeric value that represents the flags that were used to create # the regular expression. - def options + #-- + #: (Integer flags) -> Integer + def self.options(flags) o = 0 o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE) o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED) @@ -32,43 +39,87 @@ module Prism end class InterpolatedMatchLastLineNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end class InterpolatedRegularExpressionNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end class MatchLastLineNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end class RegularExpressionNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end private_constant :RegularExpressionOptions module HeredocQuery # :nodoc: # Returns true if this node was represented as a heredoc in the source code. - def heredoc? + #-- + #: (String? opening) -> bool? + def self.heredoc?(opening) + # @type self: InterpolatedStringNode | InterpolatedXStringNode | StringNode | XStringNode opening&.start_with?("<<") end end class InterpolatedStringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end end class InterpolatedXStringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end end class StringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end # Occasionally it's helpful to treat a string as if it were interpolated so # that there's a consistent interface for working with strings. + #-- + #: () -> InterpolatedStringNode def to_interpolated InterpolatedStringNode.new( source, @@ -83,10 +134,17 @@ module Prism end class XStringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end # Occasionally it's helpful to treat a string as if it were interpolated so # that there's a consistent interface for working with strings. + #-- + #: () -> InterpolatedXStringNode def to_interpolated InterpolatedXStringNode.new( source, @@ -104,6 +162,8 @@ module Prism class ImaginaryNode < Node # Returns the value of the node as a Ruby Complex. + #-- + #: () -> Complex def value Complex(0, numeric.value) end @@ -111,31 +171,25 @@ module Prism class RationalNode < Node # Returns the value of the node as a Ruby Rational. + #-- + #: () -> Rational def value Rational(numerator, denominator) end - - # Returns the value of the node as an IntegerNode or a FloatNode. This - # method is deprecated in favor of #value or #numerator/#denominator. - def numeric - deprecated("value", "numerator", "denominator") - - if denominator == 1 - IntegerNode.new(source, -1, location.chop, flags, numerator) - else - FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator) - end - end end class ConstantReadNode < Node # Returns the list of parts for the full name of this constant. # For example: [:Foo] + #-- + #: () -> Array[Symbol] def full_name_parts [name] end # Returns the full name of this constant. For example: "Foo" + #-- + #: () -> String def full_name name.to_s end @@ -144,11 +198,15 @@ module Prism class ConstantWriteNode < Node # Returns the list of parts for the full name of this constant. # For example: [:Foo] + #-- + #: () -> Array[Symbol] def full_name_parts [name] end # Returns the full name of this constant. For example: "Foo" + #-- + #: () -> String def full_name name.to_s end @@ -163,13 +221,15 @@ module Prism # local variable class DynamicPartsInConstantPathError < StandardError; end - # An error class raised when missing nodes are found while computing a + # An error class raised when error recovery nodes are found while computing a # constant path's full name. For example: # Foo:: -> raises because the constant path is missing the last part - class MissingNodesInConstantPathError < StandardError; end + class ErrorRecoveryNodesInConstantPathError < StandardError; end # Returns the list of parts for the full name of this constant path. # For example: [:Foo, :Bar] + #-- + #: () -> Array[Symbol] def full_name_parts parts = [] #: Array[Symbol] current = self #: node? @@ -177,7 +237,7 @@ module Prism while current.is_a?(ConstantPathNode) name = current.name if name.nil? - raise MissingNodesInConstantPathError, "Constant path contains missing nodes. Cannot compute full name" + raise ErrorRecoveryNodesInConstantPathError, "Constant path contains error recovery nodes. Cannot compute full name" end parts.unshift(name) @@ -192,30 +252,21 @@ module Prism end # Returns the full name of this constant path. For example: "Foo::Bar" + #-- + #: () -> String def full_name full_name_parts.join("::") end - - # Previously, we had a child node on this class that contained either a - # constant read or a missing node. To not cause a breaking change, we - # continue to supply that API. - def child - deprecated("name", "name_loc") - - if name - ConstantReadNode.new(source, -1, name_loc, 0, name) - else - MissingNode.new(source, -1, location, 0) - end - end end class ConstantPathTargetNode < Node # Returns the list of parts for the full name of this constant path. # For example: [:Foo, :Bar] + #-- + #: () -> Array[Symbol] def full_name_parts parts = - case parent + case (parent = self.parent) when ConstantPathNode, ConstantReadNode parent.full_name_parts when nil @@ -225,40 +276,33 @@ module Prism raise ConstantPathNode::DynamicPartsInConstantPathError, "Constant target path contains dynamic parts. Cannot compute full name" end - if name.nil? - raise ConstantPathNode::MissingNodesInConstantPathError, "Constant target path contains missing nodes. Cannot compute full name" + if (name = self.name).nil? + raise ConstantPathNode::ErrorRecoveryNodesInConstantPathError, "Constant target path contains error recovery nodes. Cannot compute full name" end parts.push(name) end # Returns the full name of this constant path. For example: "Foo::Bar" + #-- + #: () -> String def full_name full_name_parts.join("::") end - - # Previously, we had a child node on this class that contained either a - # constant read or a missing node. To not cause a breaking change, we - # continue to supply that API. - def child - deprecated("name", "name_loc") - - if name - ConstantReadNode.new(source, -1, name_loc, 0, name) - else - MissingNode.new(source, -1, location, 0) - end - end end class ConstantTargetNode < Node # Returns the list of parts for the full name of this constant. # For example: [:Foo] + #-- + #: () -> Array[Symbol] def full_name_parts [name] end # Returns the full name of this constant. For example: "Foo" + #-- + #: () -> String def full_name name.to_s end @@ -266,6 +310,8 @@ module Prism class ParametersNode < Node # Mirrors the Method#parameters method. + #-- + #: () -> Array[[Symbol, Symbol] | [Symbol]] def signature names = [] #: Array[[Symbol, Symbol] | [Symbol]] @@ -275,7 +321,7 @@ module Prism optionals.each { |param| names << [:opt, param.name] } - if rest && rest.is_a?(RestParameterNode) + if (rest = self.rest).is_a?(RestParameterNode) names << [:rest, rest.name || :*] end @@ -283,8 +329,7 @@ module Prism case param when MultiTargetNode names << [:req] - when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode - # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts + when ErrorRecoveryNode raise "Invalid syntax" else names << [:req, param.name] @@ -304,7 +349,7 @@ module Prism keyopt.each { |param| names << [:key, param.name] } - case keyword_rest + case (keyword_rest = self.keyword_rest) when ForwardingParameterNode names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]]) when KeywordRestParameterNode @@ -313,7 +358,13 @@ module Prism names << [:nokey] end - names << [:block, block.name || :&] if block + case (block = self.block) + when BlockParameterNode + names << [:block, block.name || :&] + when NoBlockParameterNode + names << [:noblock] + end + names end end @@ -328,181 +379,10 @@ module Prism # can be any amount of space between the message and the = sign. However, # sometimes you want the location of the full message including the inner # space and the = sign. This method provides that. + #-- + #: () -> Location? def full_message_loc attribute_write? ? message_loc&.adjoin("=") : message_loc end end - - class CallOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class ClassVariableOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class ConstantOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class ConstantPathOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class GlobalVariableOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class IndexOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class InstanceVariableOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class LocalVariableOperatorWriteNode < Node - # Returns the binary operator used to modify the receiver. This method is - # deprecated in favor of #binary_operator. - def operator - deprecated("binary_operator") - binary_operator - end - - # Returns the location of the binary operator used to modify the receiver. - # This method is deprecated in favor of #binary_operator_loc. - def operator_loc - deprecated("binary_operator_loc") - binary_operator_loc - end - end - - class CaseMatchNode < Node - # Returns the else clause of the case match node. This method is deprecated - # in favor of #else_clause. - def consequent - deprecated("else_clause") - else_clause - end - end - - class CaseNode < Node - # Returns the else clause of the case node. This method is deprecated in - # favor of #else_clause. - def consequent - deprecated("else_clause") - else_clause - end - end - - class IfNode < Node - # Returns the subsequent if/elsif/else clause of the if node. This method is - # deprecated in favor of #subsequent. - def consequent - deprecated("subsequent") - subsequent - end - end - - class RescueNode < Node - # Returns the subsequent rescue clause of the rescue node. This method is - # deprecated in favor of #subsequent. - def consequent - deprecated("subsequent") - subsequent - end - end - - class UnlessNode < Node - # Returns the else clause of the unless node. This method is deprecated in - # favor of #else_clause. - def consequent - deprecated("else_clause") - else_clause - end - end end diff --git a/lib/prism/node_find.rb b/lib/prism/node_find.rb new file mode 100644 index 0000000000..697ee430e8 --- /dev/null +++ b/lib/prism/node_find.rb @@ -0,0 +1,185 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled + +module Prism + # Finds the Prism AST node corresponding to a given Method, UnboundMethod, + # Proc, or Thread::Backtrace::Location. On CRuby, uses node_id from the + # instruction sequence for an exact match. On other implementations, falls + # back to best-effort matching by source location line number. + # + # This module is autoloaded so that programs that don't use Prism.find don't + # pay for its definition. + module NodeFind # :nodoc: + # Find the node for the given callable or backtrace location. + #-- + #: (Method | UnboundMethod | Proc | Thread::Backtrace::Location callable, bool rubyvm) -> Node? + def self.find(callable, rubyvm) + case callable + when Proc + if rubyvm + RubyVMCallableFind.new.find(callable) + elsif callable.lambda? + LineLambdaFind.new.find(callable) + else + LineProcFind.new.find(callable) + end + when Method, UnboundMethod + if rubyvm + RubyVMCallableFind.new.find(callable) + else + LineMethodFind.new.find(callable) + end + when Thread::Backtrace::Location + if rubyvm + RubyVMBacktraceLocationFind.new.find(callable) + else + LineBacktraceLocationFind.new.find(callable) + end + else + raise ArgumentError, "Expected a Method, UnboundMethod, Proc, or Thread::Backtrace::Location, got #{callable.class}" + end + end + + # Base class that handles parsing a file. + class Find + private + + # Parse the given file path, returning a ParseResult or nil. + #-- + #: (String? file) -> ParseResult? + def parse_file(file) + return unless file && File.readable?(file) + result = Prism.parse_file(file) + result if result.success? + end + end + + # Finds the AST node for a Method, UnboundMethod, or Proc using the node_id + # from the instruction sequence. + class RubyVMCallableFind < Find + # Find the node for the given callable using the ISeq node_id. + #-- + #: (Method | UnboundMethod | Proc callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + return unless (iseq = RubyVM::InstructionSequence.of(callable)) + + header = iseq.to_a[4] + return unless header[:parser] == :prism + + result.value.find { |node| node.node_id == header[:node_id] } + end + end + + # Finds the AST node for a Thread::Backtrace::Location using the node_id + # from the backtrace location. + class RubyVMBacktraceLocationFind < Find + # Find the node for the given backtrace location using node_id. + #-- + #: (Thread::Backtrace::Location location) -> Node? + def find(location) + file = location.absolute_path || location.path + return unless (result = parse_file(file)) + return unless RubyVM::AbstractSyntaxTree.respond_to?(:node_id_for_backtrace_location) + + node_id = RubyVM::AbstractSyntaxTree.node_id_for_backtrace_location(location) + + result.value.find { |node| node.node_id == node_id } + end + end + + # Finds the AST node for a Method or UnboundMethod using best-effort line + # matching. Used on non-CRuby implementations. + class LineMethodFind < Find + # Find the node for the given method by matching on name and line. + #-- + #: (Method | UnboundMethod callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + + name = callable.name + start_line = source_location[1] + + result.value.find do |node| + case node + when DefNode + node.name == name && node.location.start_line == start_line + when CallNode + node.block.is_a?(BlockNode) && node.location.start_line == start_line + else + false + end + end + end + end + + # Finds the AST node for a lambda using best-effort line matching. Used + # on non-CRuby implementations. + class LineLambdaFind < Find + # Find the node for the given lambda by matching on line. + #-- + #: (Proc callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + + start_line = source_location[1] + + result.value.find do |node| + case node + when LambdaNode + node.location.start_line == start_line + when CallNode + node.block.is_a?(BlockNode) && node.location.start_line == start_line + else + false + end + end + end + end + + # Finds the AST node for a non-lambda Proc using best-effort line + # matching. Used on non-CRuby implementations. + class LineProcFind < Find + # Find the node for the given proc by matching on line. + #-- + #: (Proc callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + + start_line = source_location[1] + + result.value.find do |node| + case node + when ForNode + node.location.start_line == start_line + when CallNode + node.block.is_a?(BlockNode) && node.location.start_line == start_line + else + false + end + end + end + end + + # Finds the AST node for a Thread::Backtrace::Location using best-effort + # line matching. Used on non-CRuby implementations. + class LineBacktraceLocationFind < Find + # Find the node for the given backtrace location by matching on line. + #-- + #: (Thread::Backtrace::Location location) -> Node? + def find(location) + file = location.absolute_path || location.path + return unless (result = parse_file(file)) + + start_line = location.lineno + result.value.find { |node| node.location.start_line == start_line } + end + end + end +end diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb deleted file mode 100644 index c0de8ab8b7..0000000000 --- a/lib/prism/pack.rb +++ /dev/null @@ -1,228 +0,0 @@ -# frozen_string_literal: true -# typed: ignore - -module Prism - # A parser for the pack template language. - module Pack - %i[ - SPACE - COMMENT - INTEGER - UTF8 - BER - FLOAT - STRING_SPACE_PADDED - STRING_NULL_PADDED - STRING_NULL_TERMINATED - STRING_MSB - STRING_LSB - STRING_HEX_HIGH - STRING_HEX_LOW - STRING_UU - STRING_MIME - STRING_BASE64 - STRING_FIXED - STRING_POINTER - MOVE - BACK - NULL - - UNSIGNED - SIGNED - SIGNED_NA - - AGNOSTIC_ENDIAN - LITTLE_ENDIAN - BIG_ENDIAN - NATIVE_ENDIAN - ENDIAN_NA - - SIZE_SHORT - SIZE_INT - SIZE_LONG - SIZE_LONG_LONG - SIZE_8 - SIZE_16 - SIZE_32 - SIZE_64 - SIZE_P - SIZE_NA - - LENGTH_FIXED - LENGTH_MAX - LENGTH_RELATIVE - LENGTH_NA - ].each do |const| - const_set(const, const) - end - - # A directive in the pack template language. - class Directive - # A symbol representing the version of Ruby. - attr_reader :version - - # A symbol representing whether or not we are packing or unpacking. - attr_reader :variant - - # A byteslice of the source string that this directive represents. - attr_reader :source - - # The type of the directive. - attr_reader :type - - # The type of signedness of the directive. - attr_reader :signed - - # The type of endianness of the directive. - attr_reader :endian - - # The size of the directive. - attr_reader :size - - # The length type of this directive (used for integers). - attr_reader :length_type - - # The length of this directive (used for integers). - attr_reader :length - - # Initialize a new directive with the given values. - def initialize(version, variant, source, type, signed, endian, size, length_type, length) - @version = version - @variant = variant - @source = source - @type = type - @signed = signed - @endian = endian - @size = size - @length_type = length_type - @length = length - end - - # The descriptions of the various types of endianness. - ENDIAN_DESCRIPTIONS = { - AGNOSTIC_ENDIAN: "agnostic", - LITTLE_ENDIAN: "little-endian (VAX)", - BIG_ENDIAN: "big-endian (network)", - NATIVE_ENDIAN: "native-endian", - ENDIAN_NA: "n/a" - } - - # The descriptions of the various types of signedness. - SIGNED_DESCRIPTIONS = { - UNSIGNED: "unsigned", - SIGNED: "signed", - SIGNED_NA: "n/a" - } - - # The descriptions of the various types of sizes. - SIZE_DESCRIPTIONS = { - SIZE_SHORT: "short", - SIZE_INT: "int-width", - SIZE_LONG: "long", - SIZE_LONG_LONG: "long long", - SIZE_8: "8-bit", - SIZE_16: "16-bit", - SIZE_32: "32-bit", - SIZE_64: "64-bit", - SIZE_P: "pointer-width" - } - - # Provide a human-readable description of the directive. - def describe - case type - when SPACE - "whitespace" - when COMMENT - "comment" - when INTEGER - if size == SIZE_8 - base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer" - else - base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer" - end - case length_type - when LENGTH_FIXED - if length > 1 - base + ", x#{length}" - else - base - end - when LENGTH_MAX - base + ", as many as possible" - else - raise - end - when UTF8 - "UTF-8 character" - when BER - "BER-compressed integer" - when FLOAT - "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float" - when STRING_SPACE_PADDED - "arbitrary binary string (space padded)" - when STRING_NULL_PADDED - "arbitrary binary string (null padded, count is width)" - when STRING_NULL_TERMINATED - "arbitrary binary string (null padded, count is width), except that null is added with *" - when STRING_MSB - "bit string (MSB first)" - when STRING_LSB - "bit string (LSB first)" - when STRING_HEX_HIGH - "hex string (high nibble first)" - when STRING_HEX_LOW - "hex string (low nibble first)" - when STRING_UU - "UU-encoded string" - when STRING_MIME - "quoted printable, MIME encoding" - when STRING_BASE64 - "base64 encoded string" - when STRING_FIXED - "pointer to a structure (fixed-length string)" - when STRING_POINTER - "pointer to a null-terminated string" - when MOVE - "move to absolute position" - when BACK - "back up a byte" - when NULL - "null byte" - else - raise - end - end - end - - # The result of parsing a pack template. - class Format - # A list of the directives in the template. - attr_reader :directives - - # The encoding of the template. - attr_reader :encoding - - # Create a new Format with the given directives and encoding. - def initialize(directives, encoding) - @directives = directives - @encoding = encoding - end - - # Provide a human-readable description of the format. - def describe - source_width = directives.map { |d| d.source.inspect.length }.max - directive_lines = directives.map do |directive| - if directive.type == SPACE - source = directive.source.inspect - else - source = directive.source - end - # @type var source_width: Integer - " #{source.ljust(source_width)} #{directive.describe}" - end - - (["Directives:"] + directive_lines + ["Encoding:", " #{encoding}"]).join("\n") - end - end - end -end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 9a3e7c5b79..93d3c006b7 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -1,6 +1,16 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism + # @rbs! + # # An internal interface for a cache that can be used to compute code + # # units from byte offsets. + # interface _CodeUnitsCache + # def []: (Integer byte_offset) -> Integer + # end + # This represents a source of Ruby code that has been parsed. It is used in # conjunction with locations to allow them to resolve line numbers and source # ranges. @@ -9,7 +19,18 @@ module Prism # be used instead of `new` and it will return either a `Source` or a # specialized and more performant `ASCIISource` if no multibyte characters # are present in the source code. - def self.for(source, start_line = 1, offsets = []) + # + # Note that if you are calling this method manually, you will need to supply + # the start_line and offsets parameters. start_line is the line number that + # the source starts on, which is typically 1 but can be different if this + # source is a subset of a larger source or if this is an eval. offsets is an + # array of byte offsets for the start of each line in the source code, which + # can be calculated by iterating through the source code and recording the + # byte offset whenever a newline character is encountered. The first + # element is always 0 to mark the first line. + #-- + #: (String source, Integer start_line, Array[Integer] offsets) -> Source + def self.for(source, start_line, offsets) if source.ascii_only? ASCIISource.new(source, start_line, offsets) elsif source.encoding == Encoding::BINARY @@ -33,77 +54,122 @@ module Prism end # The source code that this source object represents. - attr_reader :source + attr_reader :source #: String # The line number where this source starts. - attr_reader :start_line - - # The list of newline byte offsets in the source code. - attr_reader :offsets - - # Create a new source object with the given source code. - def initialize(source, start_line = 1, offsets = []) + attr_reader :start_line #: Integer + + # The list of newline byte offsets in the source code. When initialized from + # the C extension, this may be a packed binary string of uint32_t values + # that is lazily unpacked on first access. + #-- + #: () -> Array[Integer] + def offsets + offsets = @offsets + return offsets if offsets.is_a?(Array) + @offsets = offsets.unpack("L*") + end + + # Create a new source object with the given source code. The offsets + # parameter can be either an Array of Integer byte offsets or a packed + # binary string of uint32_t values (from the C extension). + #-- + #: (String source, Integer start_line, Array[Integer] | String offsets) -> void + def initialize(source, start_line, offsets) @source = source - @start_line = start_line # set after parsing is done - @offsets = offsets # set after parsing is done + @start_line = start_line + @offsets = offsets end # Replace the value of start_line with the given value. + #-- + #: (Integer start_line) -> void def replace_start_line(start_line) @start_line = start_line end # Replace the value of offsets with the given value. + #-- + #: (Array[Integer] offsets) -> void def replace_offsets(offsets) - @offsets.replace(offsets) + @offsets = offsets end # Returns the encoding of the source code, which is set by parameters to the # parser or by the encoding magic comment. + #-- + #: () -> Encoding def encoding source.encoding end # Returns the lines of the source code as an array of strings. + #-- + #: () -> Array[String] def lines source.lines end # Perform a byteslice on the source code using the given byte offset and # byte length. + #-- + #: (Integer byte_offset, Integer length) -> String def slice(byte_offset, length) source.byteslice(byte_offset, length) or raise end + # Converts the line number and column in bytes to a byte offset. + #-- + #: (Integer line, Integer column) -> Integer + def byte_offset(line, column) + normal = line - @start_line + raise IndexError if normal < 0 + offsets.fetch(normal) + column + rescue IndexError + raise ArgumentError, "line #{line} is out of range" + end + # Binary search through the offsets to find the line number for the given # byte offset. + #-- + #: (Integer byte_offset) -> Integer def line(byte_offset) start_line + find_line(byte_offset) end # Return the byte offset of the start of the line corresponding to the given # byte offset. + #-- + #: (Integer byte_offset) -> Integer def line_start(byte_offset) offsets[find_line(byte_offset)] end # Returns the byte offset of the end of the line corresponding to the given # byte offset. + #-- + #: (Integer byte_offset) -> Integer def line_end(byte_offset) offsets[find_line(byte_offset) + 1] || source.bytesize end - # Return the column number for the given byte offset. + # Return the column in bytes for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def column(byte_offset) byte_offset - line_start(byte_offset) end # Return the character offset for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def character_offset(byte_offset) (source.byteslice(0, byte_offset) or raise).length end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def character_column(byte_offset) character_offset(byte_offset) - character_offset(line_start(byte_offset)) end @@ -120,7 +186,11 @@ module Prism # possible that the given byte offset will not occur on a character # boundary. Second, it's possible that the source code will contain a # character that has no equivalent in the given encoding. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer def code_units_offset(byte_offset, encoding) + return byte_offset if encoding == Encoding::UTF_8 + byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace) if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE @@ -132,43 +202,36 @@ module Prism # Generate a cache that targets a specific encoding for calculating code # unit offsets. + #-- + #: (Encoding encoding) -> CodeUnitsCache def code_units_cache(encoding) CodeUnitsCache.new(source, encoding) end - # Returns the column number in code units for the given encoding for the + # Returns the column in code units for the given encoding for the # given byte offset. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer def code_units_column(byte_offset, encoding) code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) end # Freeze this object and the objects it contains. + #-- + #: () -> void def deep_freeze source.freeze offsets.freeze freeze end - private - - # Binary search through the offsets to find the line number for the given + # Binary search through the offsets to find the index for the given # byte offset. - def find_line(byte_offset) - left = 0 - right = offsets.length - 1 - - while left <= right - mid = left + (right - left) / 2 - return mid if (offset = offsets[mid]) == byte_offset - - if offset < byte_offset - left = mid + 1 - else - right = mid - 1 - end - end - - left - 1 + #-- + #: (Integer byte_offset) -> Integer + def find_line(byte_offset) # :nodoc: + index = offsets.bsearch_index { |offset| offset > byte_offset } || offsets.length + index - 1 end end @@ -187,38 +250,69 @@ module Prism # has not yet been implemented. # class CodeUnitsCache + # Counter used for UTF-8, where one code unit equals one byte. + class UTF8Counter # :nodoc: + #: (Integer byte_offset, Integer byte_length) -> Integer + def count(byte_offset, byte_length) + byte_length + end + end + class UTF16Counter # :nodoc: + # @rbs @source: String + # @rbs @encoding: Encoding + + #: (String source, Encoding encoding) -> void def initialize(source, encoding) @source = source @encoding = encoding end + #: (Integer byte_offset, Integer byte_length) -> Integer def count(byte_offset, byte_length) - @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2 + (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2 end end - class LengthCounter # :nodoc: + # Counter used for UTF-32, where one code unit equals one code point and + # matches String#length. Also used as a best-effort fallback for any other + # encoding that does not have a dedicated counter. + class UTF32Counter # :nodoc: + # @rbs @source: String + # @rbs @encoding: Encoding + + #: (String source, Encoding encoding) -> void def initialize(source, encoding) @source = source @encoding = encoding end + #: (Integer byte_offset, Integer byte_length) -> Integer def count(byte_offset, byte_length) - @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length + (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).length end end - private_constant :UTF16Counter, :LengthCounter + private_constant :UTF8Counter, :UTF16Counter, :UTF32Counter + + # @rbs @source: String + # @rbs @counter: UTF8Counter | UTF16Counter | UTF32Counter + # @rbs @cache: Hash[Integer, Integer] + # @rbs @offsets: Array[Integer] # Initialize a new cache with the given source and encoding. + #-- + #: (String source, Encoding encoding) -> void def initialize(source, encoding) @source = source @counter = - if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE + case encoding + when Encoding::UTF_8 + UTF8Counter.new + when Encoding::UTF_16LE, Encoding::UTF_16BE UTF16Counter.new(source, encoding) else - LengthCounter.new(source, encoding) + UTF32Counter.new(source, encoding) end @cache = {} #: Hash[Integer, Integer] @@ -226,6 +320,8 @@ module Prism end # Retrieve the code units offset from the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def [](byte_offset) @cache[byte_offset] ||= if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil? @@ -252,11 +348,15 @@ module Prism # at that point we will treat everything as single-byte characters. class ASCIISource < Source # Return the character offset for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def character_offset(byte_offset) byte_offset end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def character_column(byte_offset) byte_offset - line_start(byte_offset) end @@ -267,6 +367,8 @@ module Prism # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the # concept of code units that differs from the number of characters in other # encodings, it is not captured here. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer def code_units_offset(byte_offset, encoding) byte_offset end @@ -274,6 +376,8 @@ module Prism # Returns a cache that is the identity function in order to maintain the # same interface. We can do this because code units are always equivalent to # byte offsets for ASCII-only sources. + #-- + #: (Encoding encoding) -> _CodeUnitsCache def code_units_cache(encoding) ->(byte_offset) { byte_offset } end @@ -281,6 +385,8 @@ module Prism # Specialized version of `code_units_column` that does not depend on # `code_units_offset`, which is a more expensive operation. This is # essentially the same as `Prism::Source#column`. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer def code_units_column(byte_offset, encoding) byte_offset - line_start(byte_offset) end @@ -290,18 +396,23 @@ module Prism class Location # A Source object that is used to determine more information from the given # offset and length. - attr_reader :source + attr_reader :source #: Source protected :source # The byte offset from the beginning of the source where this location # starts. - attr_reader :start_offset + attr_reader :start_offset #: Integer # The length of this location in bytes. - attr_reader :length + attr_reader :length #: Integer + + # @rbs @leading_comments: Array[Comment]? + # @rbs @trailing_comments: Array[Comment]? # Create a new location object with the given source, start byte offset, and # byte length. + #-- + #: (Source source, Integer start_offset, Integer length) -> void def initialize(source, start_offset, length) @source = source @start_offset = start_offset @@ -316,53 +427,73 @@ module Prism # These are the comments that are associated with this location that exist # before the start of this location. + #-- + #: () -> Array[Comment] def leading_comments @leading_comments ||= [] end # Attach a comment to the leading comments of this location. + #-- + #: (Comment comment) -> void def leading_comment(comment) leading_comments << comment end # These are the comments that are associated with this location that exist # after the end of this location. + #-- + #: () -> Array[Comment] def trailing_comments @trailing_comments ||= [] end # Attach a comment to the trailing comments of this location. + #-- + #: (Comment comment) -> void def trailing_comment(comment) trailing_comments << comment end # Returns all comments that are associated with this location (both leading # and trailing comments). + #-- + #: () -> Array[Comment] def comments - [*@leading_comments, *@trailing_comments] + [*@leading_comments, *@trailing_comments] #: Array[Comment] end # Create a new location object with the given options. + #-- + #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location def copy(source: self.source, start_offset: self.start_offset, length: self.length) Location.new(source, start_offset, length) end # Returns a new location that is the result of chopping off the last byte. + #-- + #: () -> Location def chop copy(length: length == 0 ? length : length - 1) end # Returns a string representation of this location. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>" end # Returns all of the lines of the source code associated with this location. + #-- + #: () -> Array[String] def source_lines source.lines end # The source code that this location represents. + #-- + #: () -> String def slice source.slice(start_offset, length) end @@ -370,6 +501,8 @@ module Prism # The source code that this location represents starting from the beginning # of the line that this location starts on to the end of the line that this # location ends on. + #-- + #: () -> String def slice_lines line_start = source.line_start(start_offset) line_end = source.line_end(end_offset) @@ -378,118 +511,160 @@ module Prism # The character offset from the beginning of the source where this location # starts. + #-- + #: () -> Integer def start_character_offset source.character_offset(start_offset) end # The offset from the start of the file in code units of the given encoding. + #-- + #: (Encoding encoding) -> Integer def start_code_units_offset(encoding = Encoding::UTF_16LE) source.code_units_offset(start_offset, encoding) end # The start offset from the start of the file in code units using the given # cache to fetch or calculate the value. + #-- + #: (_CodeUnitsCache cache) -> Integer def cached_start_code_units_offset(cache) cache[start_offset] end # The byte offset from the beginning of the source where this location ends. + #-- + #: () -> Integer def end_offset start_offset + length end # The character offset from the beginning of the source where this location # ends. + #-- + #: () -> Integer def end_character_offset source.character_offset(end_offset) end # The offset from the start of the file in code units of the given encoding. + #-- + #: (Encoding encoding) -> Integer def end_code_units_offset(encoding = Encoding::UTF_16LE) source.code_units_offset(end_offset, encoding) end # The end offset from the start of the file in code units using the given # cache to fetch or calculate the value. + #-- + #: (_CodeUnitsCache cache) -> Integer def cached_end_code_units_offset(cache) cache[end_offset] end # The line number where this location starts. + #-- + #: () -> Integer def start_line source.line(start_offset) end # The content of the line where this location starts before this location. + #-- + #: () -> String def start_line_slice offset = source.line_start(start_offset) source.slice(offset, start_offset - offset) end # The line number where this location ends. + #-- + #: () -> Integer def end_line source.line(end_offset) end - # The column number in bytes where this location starts from the start of + # The column in bytes where this location starts from the start of # the line. + #-- + #: () -> Integer def start_column source.column(start_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. + #-- + #: () -> Integer def start_character_column source.character_column(start_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # starts from the start of the line. + #-- + #: (?Encoding encoding) -> Integer def start_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(start_offset, encoding) end # The start column in code units using the given cache to fetch or calculate # the value. + #-- + #: (_CodeUnitsCache cache) -> Integer def cached_start_code_units_column(cache) cache[start_offset] - cache[source.line_start(start_offset)] end - # The column number in bytes where this location ends from the start of the + # The column in bytes where this location ends from the start of the # line. + #-- + #: () -> Integer def end_column source.column(end_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. + #-- + #: () -> Integer def end_character_column source.character_column(end_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # ends from the start of the line. + #-- + #: (?Encoding encoding) -> Integer def end_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(end_offset, encoding) end # The end column in code units using the given cache to fetch or calculate # the value. + #-- + #: (_CodeUnitsCache cache) -> Integer def cached_end_code_units_column(cache) cache[end_offset] - cache[source.line_start(end_offset)] end # Implement the hash pattern matching interface for Location. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { start_offset: start_offset, end_offset: end_offset } end # Implement the pretty print interface for Location. - def pretty_print(q) + #-- + #: (PP q) -> void + def pretty_print(q) # :nodoc: q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})") end # Returns true if the given other location is equal to this location. + #-- + #: (untyped other) -> bool def ==(other) Location === other && other.start_offset == start_offset && @@ -499,6 +674,8 @@ module Prism # Returns a new location that stretches from this location to the given # other location. Raises an error if this location is not before the other # location or if they don't share the same source. + #-- + #: (Location other) -> Location def join(other) raise "Incompatible sources" if source != other.source raise "Incompatible locations" if start_offset > other.start_offset @@ -509,6 +686,8 @@ module Prism # Join this location with the first occurrence of the string in the source # that occurs after this location on the same line, and return the new # location. This will raise an error if the string does not exist. + #-- + #: (String string) -> Location def adjoin(string) line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset) @@ -522,23 +701,38 @@ module Prism # This represents a comment that was encountered during parsing. It is the # base class for all comment types. class Comment - # The location of this comment in the source. - attr_reader :location + # The Location of this comment in the source. + attr_reader :location #: Location # Create a new comment object with the given location. + #-- + #: (Location location) -> void def initialize(location) @location = location end # Implement the hash pattern matching interface for Comment. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { location: location } end # Returns the content of the comment by slicing it from the source code. + #-- + #: () -> String def slice location.slice end + + # Returns true if this comment happens on the same line as other code and + # false if the comment is by itself. This can only be true for inline + # comments and should be false for block comments. + #-- + #: () -> bool + def trailing? + raise NotImplementedError, "trailing? is not implemented for #{self.class}" + end end # InlineComment objects are the most common. They correspond to comments in @@ -546,12 +740,16 @@ module Prism class InlineComment < Comment # Returns true if this comment happens on the same line as other code and # false if the comment is by itself. + #-- + #: () -> bool def trailing? !location.start_line_slice.strip.empty? end # Returns a string representation of this comment. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::InlineComment @location=#{location.inspect}>" end end @@ -559,13 +757,17 @@ module Prism # EmbDocComment objects correspond to comments that are surrounded by =begin # and =end. class EmbDocComment < Comment - # This can only be true for inline comments. + # Returns false. This can only be true for inline comments. + #-- + #: () -> bool def trailing? false end # Returns a string representation of this comment. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::EmbDocComment @location=#{location.inspect}>" end end @@ -573,34 +775,44 @@ module Prism # This represents a magic comment that was encountered during parsing. class MagicComment # A Location object representing the location of the key in the source. - attr_reader :key_loc + attr_reader :key_loc #: Location # A Location object representing the location of the value in the source. - attr_reader :value_loc + attr_reader :value_loc #: Location # Create a new magic comment object with the given key and value locations. + #-- + #: (Location key_loc, Location value_loc) -> void def initialize(key_loc, value_loc) @key_loc = key_loc @value_loc = value_loc end # Returns the key of the magic comment by slicing it from the source code. + #-- + #: () -> String def key key_loc.slice end # Returns the value of the magic comment by slicing it from the source code. + #-- + #: () -> String def value value_loc.slice end # Implement the hash pattern matching interface for MagicComment. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { key_loc: key_loc, value_loc: value_loc } end # Returns a string representation of this magic comment. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>" end end @@ -609,18 +821,20 @@ module Prism class ParseError # The type of error. This is an _internal_ symbol that is used for # communicating with translation layers. It is not meant to be public API. - attr_reader :type + attr_reader :type #: Symbol # The message associated with this error. - attr_reader :message + attr_reader :message #: String # A Location object representing the location of this error in the source. - attr_reader :location + attr_reader :location #: Location # The level of this error. - attr_reader :level + attr_reader :level #: Symbol # Create a new error object with the given message and location. + #-- + #: (Symbol type, String message, Location location, Symbol level) -> void def initialize(type, message, location, level) @type = type @message = message @@ -629,12 +843,16 @@ module Prism end # Implement the hash pattern matching interface for ParseError. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { type: type, message: message, location: location, level: level } end # Returns a string representation of this error. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" end end @@ -643,18 +861,20 @@ module Prism class ParseWarning # The type of warning. This is an _internal_ symbol that is used for # communicating with translation layers. It is not meant to be public API. - attr_reader :type + attr_reader :type #: Symbol # The message associated with this warning. - attr_reader :message + attr_reader :message #: String # A Location object representing the location of this warning in the source. - attr_reader :location + attr_reader :location #: Location # The level of this warning. - attr_reader :level + attr_reader :level #: Symbol # Create a new warning object with the given message and location. + #-- + #: (Symbol type, String message, Location location, Symbol level) -> void def initialize(type, message, location, level) @type = type @message = message @@ -663,73 +883,116 @@ module Prism end # Implement the hash pattern matching interface for ParseWarning. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { type: type, message: message, location: location, level: level } end # Returns a string representation of this warning. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" end end - # This represents the result of a call to ::parse or ::parse_file. It contains - # the requested structure, any comments that were encounters, and any errors - # that were encountered. + # This represents the result of a call to Prism.parse or Prism.parse_file. + # It contains the requested structure, any comments that were encounters, + # and any errors that were encountered. class Result # The list of comments that were encountered during parsing. - attr_reader :comments + attr_reader :comments #: Array[Comment] # The list of magic comments that were encountered during parsing. - attr_reader :magic_comments + attr_reader :magic_comments #: Array[MagicComment] # An optional location that represents the location of the __END__ marker # and the rest of the content of the file. This content is loaded into the # DATA constant when the file being parsed is the main file being executed. - attr_reader :data_loc + attr_reader :data_loc #: Location? # The list of errors that were generated during parsing. - attr_reader :errors + attr_reader :errors #: Array[ParseError] # The list of warnings that were generated during parsing. - attr_reader :warnings + attr_reader :warnings #: Array[ParseWarning] # A Source instance that represents the source code that was parsed. - attr_reader :source + attr_reader :source #: Source # Create a new result object with the given values. - def initialize(comments, magic_comments, data_loc, errors, warnings, source) + #-- + #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source) @comments = comments @magic_comments = magic_comments @data_loc = data_loc @errors = errors @warnings = warnings + @continuable = continuable @source = source end # Implement the hash pattern matching interface for Result. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } end # Returns the encoding of the source code that was parsed. + #-- + #: () -> Encoding def encoding source.encoding end # Returns true if there were no errors during parsing and false if there # were. + #-- + #: () -> bool def success? errors.empty? end # Returns true if there were errors during parsing and false if there were # not. + #-- + #: () -> bool def failure? !success? end + # Returns true if the parsed source is an incomplete expression that could + # become valid with additional input. This is useful for REPL contexts (such + # as IRB) where the user may be entering a multi-line expression one line at + # a time and the implementation needs to determine whether to wait for more + # input or to evaluate what has been entered so far. + # + # Concretely, this returns true when every error present is caused by the + # parser reaching the end of the input before a construct was closed (e.g. + # an unclosed string, array, block, or keyword), and returns false when any + # error is caused by a token that makes the input structurally invalid + # regardless of what might follow (e.g. a stray `end`, `]`, or `)` with no + # matching opener). + # + # Examples: + # + # Prism.parse("1 + [").continuable? #=> true (unclosed array) + # Prism.parse("1 + ]").continuable? #=> false (stray ]) + # Prism.parse("tap do").continuable? #=> true (unclosed block) + # Prism.parse("end.tap do").continuable? #=> false (stray end) + # + #-- + #: () -> bool + def continuable? + @continuable + end + # Create a code units cache for the given encoding. + #-- + #: (Encoding encoding) -> _CodeUnitsCache def code_units_cache(encoding) source.code_units_cache(encoding) end @@ -746,32 +1009,42 @@ module Prism private_constant :Newlines # The syntax tree that was parsed from the source code. - attr_reader :value + attr_reader :value #: ProgramNode # Create a new parse result object with the given values. - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #-- + #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for ParseResult. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: super.merge!(value: value) end # Attach the list of comments to their respective locations in the tree. + #-- + #: () -> void def attach_comments! Comments.new(self).attach! # steep:ignore end # Walk the tree and mark nodes that are on a new line, loosely emulating # the behavior of CRuby's `:line` tracepoint event. + #-- + #: () -> void def mark_newlines! value.accept(Newlines.new(source.offsets.size)) # steep:ignore end # Returns a string representation of the syntax tree with the errors # displayed inline. + #-- + #: () -> String def errors_format Errors.new(self).format end @@ -780,16 +1053,20 @@ module Prism # This is a result specific to the `lex` and `lex_file` methods. class LexResult < Result # The list of tokens that were parsed from the source code. - attr_reader :value + attr_reader :value #: Array[[Token, Integer]] # Create a new lex result object with the given values. - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #-- + #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for LexResult. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: super.merge!(value: value) end end @@ -798,16 +1075,20 @@ module Prism class ParseLexResult < Result # A tuple of the syntax tree and the list of tokens that were parsed from # the source code. - attr_reader :value + attr_reader :value #: [ProgramNode, Array[[Token, Integer]]] # Create a new parse lex result object with the given values. - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) + #-- + #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) @value = value - super(comments, magic_comments, data_loc, errors, warnings, source) + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) end # Implement the hash pattern matching interface for ParseLexResult. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: super.merge!(value: value) end end @@ -815,16 +1096,20 @@ module Prism # This represents a token from the Ruby source. class Token # The Source object that represents the source this token came from. - attr_reader :source + attr_reader :source #: Source private :source # The type of token that this token is. - attr_reader :type + attr_reader :type #: Symbol # A byteslice of the source that this token represents. - attr_reader :value + attr_reader :value #: String + + # @rbs @location: Location | Integer # Create a new token object with the given type, value, and location. + #-- + #: (Source source, Symbol type, String value, Location | Integer location) -> void def initialize(source, type, value, location) @source = source @type = type @@ -833,11 +1118,15 @@ module Prism end # Implement the hash pattern matching interface for Token. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { type: type, value: value, location: location } end # A Location object representing the location of this token in the source. + #-- + #: () -> Location def location location = @location return location if location.is_a?(Location) @@ -845,7 +1134,9 @@ module Prism end # Implement the pretty print interface for Token. - def pretty_print(q) + #-- + #: (PP q) -> void + def pretty_print(q) # :nodoc: q.group do q.text(type.to_s) self.location.pretty_print(q) @@ -860,6 +1151,8 @@ module Prism end # Returns true if the given other token is equal to this token. + #-- + #: (untyped other) -> bool def ==(other) Token === other && other.type == type && @@ -867,12 +1160,16 @@ module Prism end # Returns a string representation of this token. - def inspect + #-- + #: () -> String + def inspect # :nodoc: location super end # Freeze this object and the objects it contains. + #-- + #: () -> void def deep_freeze value.freeze location.freeze @@ -887,14 +1184,16 @@ module Prism class Scope # The list of local variables that are defined in this scope. This should be # defined as an array of symbols. - attr_reader :locals + attr_reader :locals #: Array[Symbol] # The list of local variables that are forwarded to the next scope. This # should by defined as an array of symbols containing the specific values of # :*, :**, :&, or :"...". - attr_reader :forwarding + attr_reader :forwarding #: Array[Symbol] # Create a new scope object with the given locals and forwarding. + #-- + #: (Array[Symbol] locals, Array[Symbol] forwarding) -> void def initialize(locals, forwarding) @locals = locals @forwarding = forwarding @@ -904,6 +1203,8 @@ module Prism # Create a new scope with the given locals and forwarding options that is # suitable for passing into one of the Prism.* methods that accepts the # `scopes` option. + #-- + #: (?locals: Array[Symbol], ?forwarding: Array[Symbol]) -> Scope def self.scope(locals: [], forwarding: []) Scope.new(locals, forwarding) end diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb index 22c4148b2c..df80792d39 100644 --- a/lib/prism/parse_result/comments.rb +++ b/lib/prism/parse_result/comments.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism class ParseResult < Result @@ -17,32 +20,49 @@ module Prism # the comment. Otherwise it will favor attaching to the nearest location # that is after the comment. class Comments + # @rbs! + # # An internal interface for a target that comments can be attached + # # to. This is either going to be a NodeTarget or a CommentTarget. + # interface _CommentTarget + # def start_offset: () -> Integer + # def end_offset: () -> Integer + # def encloses?: (Comment) -> bool + # def leading_comment: (Comment) -> void + # def trailing_comment: (Comment) -> void + # end + # A target for attaching comments that is based on a specific node's # location. class NodeTarget # :nodoc: - attr_reader :node + attr_reader :node #: node + #: (node node) -> void def initialize(node) @node = node end + #: () -> Integer def start_offset node.start_offset end + #: () -> Integer def end_offset node.end_offset end + #: (Comment comment) -> bool def encloses?(comment) start_offset <= comment.location.start_offset && comment.location.end_offset <= end_offset end + #: (Comment comment) -> void def leading_comment(comment) node.location.leading_comment(comment) end + #: (Comment comment) -> void def trailing_comment(comment) node.location.trailing_comment(comment) end @@ -51,44 +71,54 @@ module Prism # A target for attaching comments that is based on a location field on a # node. For example, the `end` token of a ClassNode. class LocationTarget # :nodoc: - attr_reader :location + attr_reader :location #: Location + #: (Location location) -> void def initialize(location) @location = location end + #: () -> Integer def start_offset location.start_offset end + #: () -> Integer def end_offset location.end_offset end + #: (Comment comment) -> bool def encloses?(comment) false end + #: (Comment comment) -> void def leading_comment(comment) location.leading_comment(comment) end + #: (Comment comment) -> void def trailing_comment(comment) location.trailing_comment(comment) end end # The parse result that we are attaching comments to. - attr_reader :parse_result + attr_reader :parse_result #: ParseResult # Create a new Comments object that will attach comments to the given # parse result. + #-- + #: (ParseResult parse_result) -> void def initialize(parse_result) @parse_result = parse_result end # Attach the comments to their respective locations in the tree by # mutating the parse result. + #-- + #: () -> void def attach! parse_result.comments.each do |comment| preceding, enclosing, following = nearest_targets(parse_result.value, comment) @@ -116,11 +146,13 @@ module Prism # Responsible for finding the nearest targets to the given comment within # the context of the given encapsulating node. + #-- + #: (node node, Comment comment) -> [_CommentTarget?, _CommentTarget?, _CommentTarget?] def nearest_targets(node, comment) comment_start = comment.location.start_offset comment_end = comment.location.end_offset - targets = [] #: Array[_Target] + targets = [] #: Array[_CommentTarget] node.comment_targets.map do |value| case value when StatementsNode @@ -133,8 +165,8 @@ module Prism end targets.sort_by!(&:start_offset) - preceding = nil #: _Target? - following = nil #: _Target? + preceding = nil #: _CommentTarget? + following = nil #: _CommentTarget? left = 0 right = targets.length diff --git a/lib/prism/parse_result/errors.rb b/lib/prism/parse_result/errors.rb index eb4f317248..388309d23d 100644 --- a/lib/prism/parse_result/errors.rb +++ b/lib/prism/parse_result/errors.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled require "stringio" @@ -8,14 +11,18 @@ module Prism # can be used to format the errors in a human-readable way. class Errors # The parse result that contains the errors. - attr_reader :parse_result + attr_reader :parse_result #: ParseResult # Initialize a new set of errors from the given parse result. + #-- + #: (ParseResult parse_result) -> void def initialize(parse_result) @parse_result = parse_result end # Formats the errors in a human-readable way and return them as a string. + #-- + #: () -> String def format error_lines = {} #: Hash[Integer, Array[ParseError]] parse_result.errors.each do |error| diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb index 37f64f8ae2..450c790226 100644 --- a/lib/prism/parse_result/newlines.rb +++ b/lib/prism/parse_result/newlines.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism class ParseResult < Result @@ -23,13 +26,20 @@ module Prism # that case. We do that to avoid storing the extra `@newline` instance # variable on every node if we don't need it. class Newlines < Visitor + # The map of lines indices to whether or not they have been marked as + # emitting a newline event. + # @rbs @lines: Array[bool] + # Create a new Newlines visitor with the given newline offsets. + #-- + #: (Integer lines) -> void def initialize(lines) - # @type var lines: Integer @lines = Array.new(1 + lines, false) end - # Permit block/lambda nodes to mark newlines within themselves. + # Permit block nodes to mark newlines within themselves. + #-- + #: (BlockNode node) -> void def visit_block_node(node) old_lines = @lines @lines = Array.new(old_lines.size, false) @@ -41,17 +51,39 @@ module Prism end end - alias_method :visit_lambda_node, :visit_block_node + # Permit lambda nodes to mark newlines within themselves. + #-- + #: (LambdaNode node) -> void + def visit_lambda_node(node) + old_lines = @lines + @lines = Array.new(old_lines.size, false) + + begin + super(node) + ensure + @lines = old_lines + end + end - # Mark if/unless nodes as newlines. + # Mark if nodes as newlines. + #-- + #: (IfNode node) -> void def visit_if_node(node) node.newline_flag!(@lines) super(node) end - alias_method :visit_unless_node, :visit_if_node + # Mark unless nodes as newlines. + #-- + #: (UnlessNode node) -> void + def visit_unless_node(node) + node.newline_flag!(@lines) + super(node) + end # Permit statements lists to mark newlines within themselves. + #-- + #: (StatementsNode node) -> void def visit_statements_node(node) node.body.each do |child| child.newline_flag!(@lines) @@ -62,10 +94,16 @@ module Prism end class Node + # Tracks whether or not this node should emit a newline event when the + # instructions that it represents are executed. + # @rbs @newline_flag: bool + + #: () -> bool def newline_flag? # :nodoc: !!defined?(@newline_flag) end + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: line = location.start_line unless lines[line] @@ -76,48 +114,56 @@ module Prism end class BeginNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: # Never mark BeginNode with a newline flag, mark children instead. end end class ParenthesesNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: # Never mark ParenthesesNode with a newline flag, mark children instead. end end class IfNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: predicate.newline_flag!(lines) end end class UnlessNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: predicate.newline_flag!(lines) end end class UntilNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: predicate.newline_flag!(lines) end end class WhileNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: predicate.newline_flag!(lines) end end class RescueModifierNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: expression.newline_flag!(lines) end end class InterpolatedMatchLastLineNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: first = parts.first first.newline_flag!(lines) if first @@ -125,6 +171,7 @@ module Prism end class InterpolatedRegularExpressionNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: first = parts.first first.newline_flag!(lines) if first @@ -132,6 +179,7 @@ module Prism end class InterpolatedStringNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: first = parts.first first.newline_flag!(lines) if first @@ -139,6 +187,7 @@ module Prism end class InterpolatedSymbolNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: first = parts.first first.newline_flag!(lines) if first @@ -146,6 +195,7 @@ module Prism end class InterpolatedXStringNode < Node + #: (Array[bool] lines) -> void def newline_flag!(lines) # :nodoc: first = parts.first first.newline_flag!(lines) if first diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb index 03fec26789..be0493df05 100644 --- a/lib/prism/pattern.rb +++ b/lib/prism/pattern.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism # A pattern is an object that wraps a Ruby pattern matching expression. The @@ -40,7 +43,9 @@ module Prism class CompilationError < StandardError # Create a new CompilationError with the given representation of the node # that caused the error. - def initialize(repr) + #-- + #: (String repr) -> void + def initialize(repr) # :nodoc: super(<<~ERROR) prism was unable to compile the pattern you provided into a usable expression. It failed on to understand the node represented by: @@ -56,10 +61,13 @@ module Prism end # The query that this pattern was initialized with. - attr_reader :query + attr_reader :query #: String + # @rbs @compiled: Proc? # Create a new pattern with the given query. The query should be a string # containing a Ruby pattern matching expression. + #-- + #: (String query) -> void def initialize(query) @query = query @compiled = nil @@ -67,6 +75,8 @@ module Prism # Compile the query into a callable object that can be used to match against # nodes. + #-- + #: () -> Proc def compile result = Prism.parse("case nil\nin #{query}\nend") @@ -83,7 +93,10 @@ module Prism # pattern. If a block is given, it will be called with each node that # matches the pattern. If no block is given, an enumerator will be returned # that will yield each node that matches the pattern. - def scan(root) + #-- + #: (node root) -> Enumerator[node, void] + #: (node root) { (node) -> void } -> void + def scan(root, &blk) return to_enum(:scan, root) unless block_given? @compiled ||= compile @@ -99,23 +112,33 @@ module Prism # Shortcut for combining two procs into one that returns true if both return # true. - def combine_and(left, right) + #-- + #: (Proc left, Proc right) -> Proc + def combine_and(left, right) # :nodoc: ->(other) { left.call(other) && right.call(other) } end # Shortcut for combining two procs into one that returns true if either # returns true. - def combine_or(left, right) + #-- + #: (Proc left, Proc right) -> Proc + def combine_or(left, right) # :nodoc: ->(other) { left.call(other) || right.call(other) } end - # Raise an error because the given node is not supported. - def compile_error(node) + # Raise an error because the given node is not supported. Note purposefully + # not typing this method since it is a no return method that Steep does not + # understand. + #-- + #: (node node) -> bot + def compile_error(node) # :nodoc: raise CompilationError, node.inspect end # in [foo, bar, baz] - def compile_array_pattern_node(node) + #-- + #: (ArrayPatternNode node) -> Proc + def compile_array_pattern_node(node) # :nodoc: compile_error(node) if !node.rest.nil? || node.posts.any? constant = node.constant @@ -140,12 +163,16 @@ module Prism end # in foo | bar - def compile_alternation_pattern_node(node) + #-- + #: (AlternationPatternNode node) -> Proc + def compile_alternation_pattern_node(node) # :nodoc: combine_or(compile_node(node.left), compile_node(node.right)) end # in Prism::ConstantReadNode - def compile_constant_path_node(node) + #-- + #: (ConstantPathNode node) -> Proc + def compile_constant_path_node(node) # :nodoc: parent = node.parent if parent.is_a?(ConstantReadNode) && parent.slice == "Prism" @@ -160,12 +187,16 @@ module Prism # in ConstantReadNode # in String - def compile_constant_read_node(node) + #-- + #: (ConstantReadNode node) -> Proc + def compile_constant_read_node(node) # :nodoc: compile_constant_name(node, node.name) end # Compile a name associated with a constant. - def compile_constant_name(node, name) + #-- + #: ((ConstantPathNode | ConstantReadNode) node, Symbol name) -> Proc + def compile_constant_name(node, name) # :nodoc: if Prism.const_defined?(name, false) clazz = Prism.const_get(name) @@ -181,9 +212,14 @@ module Prism # in InstanceVariableReadNode[name: Symbol] # in { name: Symbol } - def compile_hash_pattern_node(node) + #-- + #: (HashPatternNode node) -> Proc + def compile_hash_pattern_node(node) # :nodoc: compile_error(node) if node.rest - compiled_constant = compile_node(node.constant) if node.constant + + if (constant = node.constant) + compiled_constant = compile_node(constant) + end preprocessed = node.elements.to_h do |element| @@ -211,12 +247,16 @@ module Prism end # in nil - def compile_nil_node(node) + #-- + #: (NilNode node) -> Proc + def compile_nil_node(node) # :nodoc: ->(attribute) { attribute.nil? } end # in /foo/ - def compile_regular_expression_node(node) + #-- + #: (RegularExpressionNode node) -> Proc + def compile_regular_expression_node(node) # :nodoc: regexp = Regexp.new(node.unescaped, node.closing[1..]) ->(attribute) { regexp === attribute } @@ -224,7 +264,9 @@ module Prism # in "" # in "foo" - def compile_string_node(node) + #-- + #: (StringNode node) -> Proc + def compile_string_node(node) # :nodoc: string = node.unescaped ->(attribute) { string === attribute } @@ -232,7 +274,9 @@ module Prism # in :+ # in :foo - def compile_symbol_node(node) + #-- + #: (SymbolNode node) -> Proc + def compile_symbol_node(node) # :nodoc: symbol = node.unescaped.to_sym ->(attribute) { symbol === attribute } @@ -240,7 +284,9 @@ module Prism # Compile any kind of node. Dispatch out to the individual compilation # methods based on the type of node. - def compile_node(node) + #-- + #: (node node) -> Proc + def compile_node(node) # :nodoc: case node when AlternationPatternNode compile_alternation_pattern_node(node) diff --git a/lib/prism/polyfill/scan_byte.rb b/lib/prism/polyfill/scan_byte.rb new file mode 100644 index 0000000000..9276e509fc --- /dev/null +++ b/lib/prism/polyfill/scan_byte.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require "strscan" + +# Polyfill for StringScanner#scan_byte, which didn't exist until Ruby 3.4. +if !(StringScanner.method_defined?(:scan_byte)) + StringScanner.include( + Module.new { + def scan_byte # :nodoc: + get_byte&.b&.ord + end + } + ) +end diff --git a/lib/prism/polyfill/warn.rb b/lib/prism/polyfill/warn.rb index 560380d308..76a4264623 100644 --- a/lib/prism/polyfill/warn.rb +++ b/lib/prism/polyfill/warn.rb @@ -7,17 +7,14 @@ if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.pa Kernel.prepend( Module.new { def warn(*msgs, uplevel: nil, category: nil) # :nodoc: - uplevel = - case uplevel - when nil - 1 - when Integer - uplevel + 1 - else - uplevel.to_int + 1 - end - - super(*msgs, uplevel: uplevel) + case uplevel + when nil + super(*msgs) + when Integer + super(*msgs, uplevel: uplevel + 1) + else + super(*msgs, uplevel: uplevel.to_int + 1) + end end } ) @@ -25,17 +22,14 @@ if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.pa Object.prepend( Module.new { def warn(*msgs, uplevel: nil, category: nil) # :nodoc: - uplevel = - case uplevel - when nil - 1 - when Integer - uplevel + 1 - else - uplevel.to_int + 1 - end - - super(*msgs, uplevel: uplevel) + case uplevel + when nil + super(*msgs) + when Integer + super(*msgs, uplevel: uplevel + 1) + else + super(*msgs, uplevel: uplevel.to_int + 1) + end end } ) diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 5cb5a98057..aac056b3f8 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |spec| spec.name = "prism" - spec.version = "1.4.0" + spec.version = "1.9.0" spec.authors = ["Shopify"] spec.email = ["ruby@shopify.com"] @@ -42,31 +42,69 @@ Gem::Specification.new do |spec| "docs/serialization.md", "docs/testing.md", "ext/prism/api_node.c", - "ext/prism/api_pack.c", + "ext/prism/extconf.rb", "ext/prism/extension.c", "ext/prism/extension.h", "include/prism.h", + "include/prism/compiler/accel.h", + "include/prism/compiler/align.h", + "include/prism/compiler/exported.h", + "include/prism/compiler/fallthrough.h", + "include/prism/compiler/filesystem.h", + "include/prism/compiler/flex_array.h", + "include/prism/compiler/force_inline.h", + "include/prism/compiler/format.h", + "include/prism/compiler/inline.h", + "include/prism/compiler/nodiscard.h", + "include/prism/compiler/nonnull.h", + "include/prism/compiler/unused.h", + "include/prism/internal/allocator.h", + "include/prism/internal/allocator_debug.h", + "include/prism/internal/arena.h", + "include/prism/internal/bit.h", + "include/prism/internal/buffer.h", + "include/prism/internal/char.h", + "include/prism/internal/comments.h", + "include/prism/internal/constant_pool.h", + "include/prism/internal/diagnostic.h", + "include/prism/internal/encoding.h", + "include/prism/internal/integer.h", + "include/prism/internal/isinf.h", + "include/prism/internal/line_offset_list.h", + "include/prism/internal/list.h", + "include/prism/internal/magic_comments.h", + "include/prism/internal/memchr.h", + "include/prism/internal/node.h", + "include/prism/internal/options.h", + "include/prism/internal/parser.h", + "include/prism/internal/regexp.h", + "include/prism/internal/serialize.h", + "include/prism/internal/source.h", + "include/prism/internal/static_literals.h", + "include/prism/internal/strncasecmp.h", + "include/prism/internal/stringy.h", + "include/prism/internal/strpbrk.h", + "include/prism/internal/tokens.h", + "include/prism/arena.h", "include/prism/ast.h", - "include/prism/defines.h", + "include/prism/buffer.h", + "include/prism/comments.h", + "include/prism/constant_pool.h", "include/prism/diagnostic.h", - "include/prism/encoding.h", + "include/prism/excludes.h", + "include/prism/integer.h", + "include/prism/json.h", + "include/prism/line_offset_list.h", + "include/prism/magic_comments.h", "include/prism/node.h", "include/prism/options.h", - "include/prism/pack.h", "include/prism/parser.h", "include/prism/prettyprint.h", - "include/prism/regexp.h", - "include/prism/static_literals.h", - "include/prism/util/pm_buffer.h", - "include/prism/util/pm_char.h", - "include/prism/util/pm_constant_pool.h", - "include/prism/util/pm_integer.h", - "include/prism/util/pm_list.h", - "include/prism/util/pm_memchr.h", - "include/prism/util/pm_newline_list.h", - "include/prism/util/pm_strncasecmp.h", - "include/prism/util/pm_string.h", - "include/prism/util/pm_strpbrk.h", + "include/prism/serialize.h", + "include/prism/source.h", + "include/prism/stream.h", + "include/prism/string_query.h", + "include/prism/stringy.h", "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", @@ -79,8 +117,8 @@ Gem::Specification.new do |spec| "lib/prism/lex_compat.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", + "lib/prism/node_find.rb", "lib/prism/node.rb", - "lib/prism/pack.rb", "lib/prism/parse_result.rb", "lib/prism/parse_result/comments.rb", "lib/prism/parse_result/errors.rb", @@ -88,6 +126,7 @@ Gem::Specification.new do |spec| "lib/prism/pattern.rb", "lib/prism/polyfill/append_as_bytes.rb", "lib/prism/polyfill/byteindex.rb", + "lib/prism/polyfill/scan_byte.rb", "lib/prism/polyfill/unpack1.rb", "lib/prism/polyfill/warn.rb", "lib/prism/reflection.rb", @@ -97,73 +136,93 @@ Gem::Specification.new do |spec| "lib/prism/translation.rb", "lib/prism/translation/parser.rb", "lib/prism/translation/parser_current.rb", - "lib/prism/translation/parser33.rb", - "lib/prism/translation/parser34.rb", - "lib/prism/translation/parser35.rb", + "lib/prism/translation/parser_versions.rb", "lib/prism/translation/parser/builder.rb", "lib/prism/translation/parser/compiler.rb", "lib/prism/translation/parser/lexer.rb", "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/filter.rb", + "lib/prism/translation/ripper/lexer.rb", "lib/prism/translation/ripper/sexp.rb", "lib/prism/translation/ripper/shim.rb", "lib/prism/translation/ruby_parser.rb", "lib/prism/visitor.rb", "prism.gemspec", - "rbi/prism.rbi", - "rbi/prism/compiler.rbi", - "rbi/prism/dsl.rbi", - "rbi/prism/inspect_visitor.rbi", - "rbi/prism/node_ext.rbi", - "rbi/prism/node.rbi", - "rbi/prism/parse_result.rbi", - "rbi/prism/reflection.rbi", - "rbi/prism/string_query.rbi", + "rbi/generated/prism.rbi", + "rbi/generated/prism/compiler.rbi", + "rbi/generated/prism/desugar_compiler.rbi", + "rbi/generated/prism/dispatcher.rbi", + "rbi/generated/prism/dot_visitor.rbi", + "rbi/generated/prism/dsl.rbi", + "rbi/generated/prism/inspect_visitor.rbi", + "rbi/generated/prism/lex_compat.rbi", + "rbi/generated/prism/mutation_compiler.rbi", + "rbi/generated/prism/node.rbi", + "rbi/generated/prism/node_ext.rbi", + "rbi/generated/prism/node_find.rbi", + "rbi/generated/prism/parse_result.rbi", + "rbi/generated/prism/pattern.rbi", + "rbi/generated/prism/reflection.rbi", + "rbi/generated/prism/relocation.rbi", + "rbi/generated/prism/serialize.rbi", + "rbi/generated/prism/string_query.rbi", + "rbi/generated/prism/translation.rbi", + "rbi/generated/prism/visitor.rbi", + "rbi/generated/prism/parse_result/comments.rbi", + "rbi/generated/prism/parse_result/errors.rbi", + "rbi/generated/prism/parse_result/newlines.rbi", "rbi/prism/translation/parser.rbi", - "rbi/prism/translation/parser33.rbi", - "rbi/prism/translation/parser34.rbi", - "rbi/prism/translation/parser35.rbi", + "rbi/prism/translation/parser_versions.rbi", "rbi/prism/translation/ripper.rbi", - "rbi/prism/visitor.rbi", - "sig/prism.rbs", - "sig/prism/compiler.rbs", - "sig/prism/dispatcher.rbs", - "sig/prism/dot_visitor.rbs", - "sig/prism/dsl.rbs", - "sig/prism/inspect_visitor.rbs", - "sig/prism/lex_compat.rbs", - "sig/prism/mutation_compiler.rbs", - "sig/prism/node_ext.rbs", - "sig/prism/node.rbs", - "sig/prism/pack.rbs", - "sig/prism/parse_result.rbs", - "sig/prism/parse_result/comments.rbs", - "sig/prism/pattern.rbs", - "sig/prism/reflection.rbs", - "sig/prism/relocation.rbs", - "sig/prism/serialize.rbs", - "sig/prism/string_query.rbs", - "sig/prism/visitor.rbs", + "rbi/rubyvm/node_find.rbi", + "sig/generated/prism.rbs", + "sig/generated/prism/compiler.rbs", + "sig/generated/prism/desugar_compiler.rbs", + "sig/generated/prism/dispatcher.rbs", + "sig/generated/prism/dot_visitor.rbs", + "sig/generated/prism/dsl.rbs", + "sig/generated/prism/inspect_visitor.rbs", + "sig/generated/prism/lex_compat.rbs", + "sig/generated/prism/mutation_compiler.rbs", + "sig/generated/prism/node.rbs", + "sig/generated/prism/node_ext.rbs", + "sig/generated/prism/node_find.rbs", + "sig/generated/prism/parse_result.rbs", + "sig/generated/prism/pattern.rbs", + "sig/generated/prism/reflection.rbs", + "sig/generated/prism/relocation.rbs", + "sig/generated/prism/serialize.rbs", + "sig/generated/prism/string_query.rbs", + "sig/generated/prism/translation.rbs", + "sig/generated/prism/visitor.rbs", + "sig/generated/prism/parse_result/comments.rbs", + "sig/generated/prism/parse_result/errors.rbs", + "sig/generated/prism/parse_result/newlines.rbs", + "src/arena.c", + "src/buffer.c", + "src/char.c", + "src/constant_pool.c", "src/diagnostic.c", "src/encoding.c", + "src/integer.c", + "src/json.c", + "src/line_offset_list.c", + "src/list.c", + "src/memchr.c", "src/node.c", "src/options.c", - "src/pack.c", + "src/parser.c", "src/prettyprint.c", "src/prism.c", "src/regexp.c", "src/serialize.c", + "src/source.c", "src/static_literals.c", - "src/token_type.c", - "src/util/pm_buffer.c", - "src/util/pm_char.c", - "src/util/pm_constant_pool.c", - "src/util/pm_integer.c", - "src/util/pm_list.c", - "src/util/pm_memchr.c", - "src/util/pm_newline_list.c", - "src/util/pm_string.c", - "src/util/pm_strncasecmp.c", - "src/util/pm_strpbrk.c" + "src/string_query.c", + "src/stringy.c", + "src/strncasecmp.c", + "src/strpbrk.c", + "src/tokens.c" ] spec.extensions = ["ext/prism/extconf.rb"] diff --git a/lib/prism/relocation.rb b/lib/prism/relocation.rb index 163d2012c5..af0f792827 100644 --- a/lib/prism/relocation.rb +++ b/lib/prism/relocation.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism # Prism parses deterministically for the same input. This provides a nice @@ -11,6 +14,33 @@ module Prism # "save" nodes and locations using a minimal amount of memory (just the # node_id and a field identifier) and then reify them later. module Relocation + # @rbs! + # type entry_value = untyped + # type entry_values = Hash[Symbol, entry_value] + # + # interface _Value + # def start_line: () -> Integer + # def end_line: () -> Integer + # def start_offset: () -> Integer + # def end_offset: () -> Integer + # def start_character_offset: () -> Integer + # def end_character_offset: () -> Integer + # def cached_start_code_units_offset: (_CodeUnitsCache cache) -> Integer + # def cached_end_code_units_offset: (_CodeUnitsCache cache) -> Integer + # def start_column: () -> Integer + # def end_column: () -> Integer + # def start_character_column: () -> Integer + # def end_character_column: () -> Integer + # def cached_start_code_units_column: (_CodeUnitsCache cache) -> Integer + # def cached_end_code_units_column: (_CodeUnitsCache cache) -> Integer + # def leading_comments: () -> Array[Comment] + # def trailing_comments: () -> Array[Comment] + # end + # + # interface _Field + # def fields: (_Value value) -> entry_values + # end + # An entry in a repository that will lazily reify its values when they are # first accessed. class Entry @@ -20,109 +50,152 @@ module Prism class MissingValueError < StandardError end + # @rbs @repository: Repository? + # @rbs @values: Hash[Symbol, untyped]? + # Initialize a new entry with the given repository. + #-- + #: (Repository repository) -> void def initialize(repository) @repository = repository @values = nil end # Fetch the filepath of the value. + #-- + #: () -> String def filepath fetch_value(:filepath) end # Fetch the start line of the value. + #-- + #: () -> Integer def start_line fetch_value(:start_line) end # Fetch the end line of the value. + #-- + #: () -> Integer def end_line fetch_value(:end_line) end # Fetch the start byte offset of the value. + #-- + #: () -> Integer def start_offset fetch_value(:start_offset) end # Fetch the end byte offset of the value. + #-- + #: () -> Integer def end_offset fetch_value(:end_offset) end # Fetch the start character offset of the value. + #-- + #: () -> Integer def start_character_offset fetch_value(:start_character_offset) end # Fetch the end character offset of the value. + #-- + #: () -> Integer def end_character_offset fetch_value(:end_character_offset) end # Fetch the start code units offset of the value, for the encoding that # was configured on the repository. + #-- + #: () -> Integer def start_code_units_offset fetch_value(:start_code_units_offset) end # Fetch the end code units offset of the value, for the encoding that was # configured on the repository. + #-- + #: () -> Integer def end_code_units_offset fetch_value(:end_code_units_offset) end # Fetch the start byte column of the value. + #-- + #: () -> Integer def start_column fetch_value(:start_column) end # Fetch the end byte column of the value. + #-- + #: () -> Integer def end_column fetch_value(:end_column) end # Fetch the start character column of the value. + #-- + #: () -> Integer def start_character_column fetch_value(:start_character_column) end # Fetch the end character column of the value. + #-- + #: () -> Integer def end_character_column fetch_value(:end_character_column) end # Fetch the start code units column of the value, for the encoding that # was configured on the repository. + #-- + #: () -> Integer def start_code_units_column fetch_value(:start_code_units_column) end # Fetch the end code units column of the value, for the encoding that was # configured on the repository. + #-- + #: () -> Integer def end_code_units_column fetch_value(:end_code_units_column) end # Fetch the leading comments of the value. + #-- + #: () -> Array[CommentsField::Comment] def leading_comments fetch_value(:leading_comments) end # Fetch the trailing comments of the value. + #-- + #: () -> Array[CommentsField::Comment] def trailing_comments fetch_value(:trailing_comments) end # Fetch the leading and trailing comments of the value. + #-- + #: () -> Array[CommentsField::Comment] def comments - leading_comments.concat(trailing_comments) + [*leading_comments, *trailing_comments] end # Reify the values on this entry with the given values. This is an # internal-only API that is called from the repository when it is time to # reify the values. + #-- + #: (entry_values values) -> void def reify!(values) # :nodoc: @repository = nil @values = values @@ -131,6 +204,8 @@ module Prism private # Fetch a value from the entry, raising an error if it is missing. + #-- + #: (Symbol name) -> entry_value def fetch_value(name) values.fetch(name) do raise MissingValueError, "No value for #{name}, make sure the " \ @@ -139,27 +214,35 @@ module Prism end # Return the values from the repository, reifying them if necessary. + #-- + #: () -> entry_values def values - @values || (@repository.reify!; @values) + @values || (@repository&.reify!; @values) #: entry_values end end # Represents the source of a repository that will be reparsed. class Source # The value that will need to be reparsed. - attr_reader :value + attr_reader :value #: untyped # Initialize the source with the given value. + #-- + #: (untyped value) -> void def initialize(value) @value = value end # Reparse the value and return the parse result. + #-- + #: () -> ParseResult def result raise NotImplementedError, "Subclasses must implement #result" end # Create a code units cache for the given encoding. + #-- + #: (Encoding encoding) -> _CodeUnitsCache def code_units_cache(encoding) result.code_units_cache(encoding) end @@ -168,6 +251,8 @@ module Prism # A source that is represented by a file path. class SourceFilepath < Source # Reparse the file and return the parse result. + #-- + #: () -> ParseResult def result Prism.parse_file(value) end @@ -176,6 +261,8 @@ module Prism # A source that is represented by a string. class SourceString < Source # Reparse the string and return the parse result. + #-- + #: () -> ParseResult def result Prism.parse(value) end @@ -184,14 +271,18 @@ module Prism # A field that represents the file path. class FilepathField # The file path that this field represents. - attr_reader :value + attr_reader :value #: String # Initialize a new field with the given file path. + #-- + #: (String value) -> void def initialize(value) @value = value end # Fetch the file path. + #-- + #: (_Value _value) -> entry_values def fields(_value) { filepath: value } end @@ -200,6 +291,8 @@ module Prism # A field representing the start and end lines. class LinesField # Fetches the start and end line of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { start_line: value.start_line, end_line: value.end_line } end @@ -208,6 +301,8 @@ module Prism # A field representing the start and end byte offsets. class OffsetsField # Fetches the start and end byte offset of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { start_offset: value.start_offset, end_offset: value.end_offset } end @@ -216,6 +311,8 @@ module Prism # A field representing the start and end character offsets. class CharacterOffsetsField # Fetches the start and end character offset of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { start_character_offset: value.start_character_offset, @@ -228,12 +325,16 @@ module Prism class CodeUnitOffsetsField # A pointer to the repository object that is used for lazily creating a # code units cache. - attr_reader :repository + attr_reader :repository #: Repository # The associated encoding for the code units. - attr_reader :encoding + attr_reader :encoding #: Encoding + + # @rbs @cache: _CodeUnitsCache? # Initialize a new field with the associated repository and encoding. + #-- + #: (Repository repository, Encoding encoding) -> void def initialize(repository, encoding) @repository = repository @encoding = encoding @@ -242,6 +343,8 @@ module Prism # Fetches the start and end code units offset of a value for a particular # encoding. + #-- + #: (_Value value) -> entry_values def fields(value) { start_code_units_offset: value.cached_start_code_units_offset(cache), @@ -252,6 +355,8 @@ module Prism private # Lazily create a code units cache for the associated encoding. + #-- + #: () -> _CodeUnitsCache def cache @cache ||= repository.code_units_cache(encoding) end @@ -260,6 +365,8 @@ module Prism # A field representing the start and end byte columns. class ColumnsField # Fetches the start and end byte column of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { start_column: value.start_column, end_column: value.end_column } end @@ -268,6 +375,8 @@ module Prism # A field representing the start and end character columns. class CharacterColumnsField # Fetches the start and end character column of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { start_character_column: value.start_character_column, @@ -281,12 +390,16 @@ module Prism class CodeUnitColumnsField # The repository object that is used for lazily creating a code units # cache. - attr_reader :repository + attr_reader :repository #: Repository # The associated encoding for the code units. - attr_reader :encoding + attr_reader :encoding #: Encoding + + # @rbs @cache: _CodeUnitsCache? # Initialize a new field with the associated repository and encoding. + #-- + #: (Repository repository, Encoding encoding) -> void def initialize(repository, encoding) @repository = repository @encoding = encoding @@ -295,6 +408,8 @@ module Prism # Fetches the start and end code units column of a value for a particular # encoding. + #-- + #: (_Value value) -> entry_values def fields(value) { start_code_units_column: value.cached_start_code_units_column(cache), @@ -305,6 +420,8 @@ module Prism private # Lazily create a code units cache for the associated encoding. + #-- + #: () -> _CodeUnitsCache def cache @cache ||= repository.code_units_cache(encoding) end @@ -315,9 +432,11 @@ module Prism # An object that represents a slice of a comment. class Comment # The slice of the comment. - attr_reader :slice + attr_reader :slice #: String # Initialize a new comment with the given slice. + # + #: (String slice) -> void def initialize(slice) @slice = slice end @@ -326,6 +445,8 @@ module Prism private # Create comment objects from the given values. + #-- + #: (entry_value values) -> Array[Comment] def comments(values) values.map { |value| Comment.new(value.slice) } end @@ -334,6 +455,8 @@ module Prism # A field representing the leading comments. class LeadingCommentsField < CommentsField # Fetches the leading comments of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { leading_comments: comments(value.leading_comments) } end @@ -342,6 +465,8 @@ module Prism # A field representing the trailing comments. class TrailingCommentsField < CommentsField # Fetches the trailing comments of a value. + #-- + #: (_Value value) -> entry_values def fields(value) { trailing_comments: comments(value.trailing_comments) } end @@ -357,15 +482,17 @@ module Prism # The source associated with this repository. This will be either a # SourceFilepath (the most common use case) or a SourceString. - attr_reader :source + attr_reader :source #: Source # The fields that have been configured on this repository. - attr_reader :fields + attr_reader :fields #: Hash[Symbol, _Field] # The entries that have been saved on this repository. - attr_reader :entries + attr_reader :entries #: Hash[Integer, Hash[Symbol, Entry]] # Initialize a new repository with the given source. + #-- + #: (Source source) -> void def initialize(source) @source = source @fields = {} @@ -373,69 +500,93 @@ module Prism end # Create a code units cache for the given encoding from the source. + #-- + #: (Encoding encoding) -> _CodeUnitsCache def code_units_cache(encoding) source.code_units_cache(encoding) end # Configure the filepath field for this repository and return self. + #-- + #: () -> self def filepath raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath) field(:filepath, FilepathField.new(source.value)) end # Configure the lines field for this repository and return self. + #-- + #: () -> self def lines field(:lines, LinesField.new) end # Configure the offsets field for this repository and return self. + #-- + #: () -> self def offsets field(:offsets, OffsetsField.new) end # Configure the character offsets field for this repository and return # self. + #-- + #: () -> self def character_offsets field(:character_offsets, CharacterOffsetsField.new) end # Configure the code unit offsets field for this repository for a specific # encoding and return self. + #-- + #: (Encoding encoding) -> self def code_unit_offsets(encoding) field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding)) end # Configure the columns field for this repository and return self. + #-- + #: () -> self def columns field(:columns, ColumnsField.new) end # Configure the character columns field for this repository and return # self. + #-- + #: () -> self def character_columns field(:character_columns, CharacterColumnsField.new) end # Configure the code unit columns field for this repository for a specific # encoding and return self. + #-- + #: (Encoding encoding) -> self def code_unit_columns(encoding) field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding)) end # Configure the leading comments field for this repository and return # self. + #-- + #: () -> self def leading_comments field(:leading_comments, LeadingCommentsField.new) end # Configure the trailing comments field for this repository and return # self. + #-- + #: () -> self def trailing_comments field(:trailing_comments, TrailingCommentsField.new) end # Configure both the leading and trailing comment fields for this # repository and return self. + #-- + #: () -> self def comments leading_comments.trailing_comments end @@ -443,6 +594,8 @@ module Prism # This method is called from nodes and locations when they want to enter # themselves into the repository. It it internal-only and meant to be # called from the #save* APIs. + #-- + #: (Integer node_id, Symbol field_name) -> Entry def enter(node_id, field_name) # :nodoc: entry = Entry.new(self) @entries[node_id][field_name] = entry @@ -452,6 +605,8 @@ module Prism # This method is called from the entries in the repository when they need # to reify their values. It is internal-only and meant to be called from # the various value APIs. + #-- + #: () -> void def reify! # :nodoc: result = source.result @@ -465,7 +620,7 @@ module Prism while (node = queue.shift) @entries[node.node_id].each do |field_name, entry| value = node.public_send(field_name) - values = {} #: Hash[Symbol, untyped] + values = {} #: entry_values fields.each_value do |field| values.merge!(field.fields(value)) @@ -484,6 +639,8 @@ module Prism # Append the given field to the repository and return the repository so # that these calls can be chained. + #-- + #: (Symbol name, _Field) -> self def field(name, value) raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name) @fields[name] = value @@ -492,11 +649,15 @@ module Prism end # Create a new repository for the given filepath. + #-- + #: (String value) -> Repository def self.filepath(value) Repository.new(SourceFilepath.new(value)) end # Create a new repository for the given string. + #-- + #: (String value) -> Repository def self.string(value) Repository.new(SourceString.new(value)) end diff --git a/lib/prism/string_query.rb b/lib/prism/string_query.rb index 9011051d2b..99ce57e5fe 100644 --- a/lib/prism/string_query.rb +++ b/lib/prism/string_query.rb @@ -1,28 +1,44 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism # Query methods that allow categorizing strings based on their context for # where they could be valid in a Ruby syntax tree. class StringQuery + # @rbs! + # def self.local?: (String string) -> bool + # def self.constant?: (String string) -> bool + # def self.method_name?: (String string) -> bool + # The string that this query is wrapping. - attr_reader :string + attr_reader :string #: String # Initialize a new query with the given string. + #-- + #: (String string) -> void def initialize(string) @string = string end # Whether or not this string is a valid local variable name. + #-- + #: () -> bool def local? StringQuery.local?(string) end # Whether or not this string is a valid constant name. + #-- + #: () -> bool def constant? StringQuery.constant?(string) end # Whether or not this string is a valid method name. + #-- + #: () -> bool def method_name? StringQuery.method_name?(string) end diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb index 511c80febc..5a086a7542 100644 --- a/lib/prism/translation.rb +++ b/lib/prism/translation.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism # This module is responsible for converting the prism syntax tree into other @@ -6,9 +9,11 @@ module Prism module Translation # steep:ignore autoload :Parser, "prism/translation/parser" autoload :ParserCurrent, "prism/translation/parser_current" - autoload :Parser33, "prism/translation/parser33" - autoload :Parser34, "prism/translation/parser34" - autoload :Parser35, "prism/translation/parser35" + autoload :Parser33, "prism/translation/parser_versions" + autoload :Parser34, "prism/translation/parser_versions" + autoload :Parser35, "prism/translation/parser_versions" + autoload :Parser40, "prism/translation/parser_versions" + autoload :Parser41, "prism/translation/parser_versions" autoload :Ripper, "prism/translation/ripper" autoload :RubyParser, "prism/translation/ruby_parser" end diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb index d43ad7c1e4..70031f133a 100644 --- a/lib/prism/translation/parser.rb +++ b/lib/prism/translation/parser.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown begin required_version = ">= 3.3.7.2" @@ -18,6 +19,13 @@ module Prism # whitequark/parser gem's syntax tree. It inherits from the base parser for # the parser gem, and overrides the parse* methods to parse with prism and # then translate. + # + # Note that this version of the parser always parses using the latest + # version of Ruby syntax supported by Prism. If you want specific version + # support, use one of the version-specific subclasses, such as + # `Prism::Translation::Parser34`. If you want to parse using the same + # version of Ruby syntax as the currently running version of Ruby, use + # `Prism::Translation::ParserCurrent`. class Parser < ::Parser::Base Diagnostic = ::Parser::Diagnostic # :nodoc: private_constant :Diagnostic @@ -25,7 +33,7 @@ module Prism # The parser gem has a list of diagnostics with a hard-coded set of error # messages. We create our own diagnostic class in order to set our own # error messages. - class PrismDiagnostic < Diagnostic + class PrismDiagnostic < Diagnostic # :nodoc: # This is the cached message coming from prism. attr_reader :message @@ -76,7 +84,7 @@ module Prism end def version # :nodoc: - 34 + 41 end # The default encoding for Ruby files is UTF-8. @@ -348,8 +356,10 @@ module Prism "3.3.1" when 34 "3.4.0" - when 35 - "3.5.0" + when 35, 40 + "4.0.0" + when 41 + "4.1.0" else "latest" end diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb index d3b51f4275..7fc3bba6b7 100644 --- a/lib/prism/translation/parser/builder.rb +++ b/lib/prism/translation/parser/builder.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown module Prism module Translation @@ -6,12 +7,14 @@ module Prism # A builder that knows how to convert more modern Ruby syntax # into whitequark/parser gem's syntax tree. class Builder < ::Parser::Builders::Default - # It represents the `it` block argument, which is not yet implemented in the Parser gem. + # It represents the `it` block argument, which is not yet implemented in + # the Parser gem. def itarg n(:itarg, [:it], nil) end - # The following three lines have been added to support the `it` block parameter syntax in the source code below. + # The following three lines have been added to support the `it` block + # parameter syntax in the source code below. # # if args.type == :itarg # block_type = :itblock @@ -55,6 +58,12 @@ module Prism method_call.loc.with_expression(join_exprs(method_call, block))) end end + + # def foo(&nil); end + # ^^^^ + def blocknilarg(amper_t, nil_t) + n0(:blocknilarg, arg_prefix_map(amper_t, nil_t)) + end end end end diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 0bd9d74f93..d11db12ae6 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1,13 +1,14 @@ # frozen_string_literal: true +# :markup: markdown module Prism module Translation class Parser # A visitor that knows how to convert a prism syntax tree into the # whitequark/parser gem's syntax tree. - class Compiler < ::Prism::Compiler + class Compiler < ::Prism::Compiler # :nodoc: # Raised when the tree is malformed or there is a bug in the compiler. - class CompilationError < StandardError + class CompilationError < StandardError # :nodoc: end # The Parser::Base instance that is being used to build the AST. @@ -216,7 +217,7 @@ module Prism rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil, token(rescue_clause.operator_loc), visit(rescue_clause.reference), - srange_find(find_start_offset, find_end_offset, ";"), + srange_semicolon(find_start_offset, find_end_offset), visit(rescue_clause.statements) ) end until (rescue_clause = rescue_clause.subsequent).nil? @@ -296,11 +297,6 @@ module Prism if node.call_operator_loc.nil? case name - when :-@ - case (receiver = node.receiver).type - when :integer_node, :float_node, :rational_node, :imaginary_node - return visit(numeric_negate(node.message_loc, receiver)) - end when :! return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block) when :=~ @@ -322,7 +318,7 @@ module Prism visit_all(arguments), token(node.closing_loc), ), - srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="), + token(node.equal_loc), visit(node.arguments.arguments.last) ), block @@ -339,7 +335,7 @@ module Prism if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil? builder.assign( builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)), - srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="), + token(node.equal_loc), visit(node.arguments.arguments.last) ) else @@ -788,7 +784,7 @@ module Prism if (do_keyword_loc = node.do_keyword_loc) token(do_keyword_loc) else - srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";") + srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset) end, visit(node.statements), token(node.end_keyword_loc) @@ -920,7 +916,7 @@ module Prism if (then_keyword_loc = node.then_keyword_loc) token(then_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset) end, visit(node.statements), case node.subsequent @@ -986,7 +982,7 @@ module Prism if (then_loc = node.then_loc) token(then_loc) else - srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";") + srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset) end, visit(node.statements) ) @@ -1323,7 +1319,7 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. The parser gem doesn't have such a concept, so # we invent our own here. - def visit_missing_node(node) + def visit_error_recovery_node(node) ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location))) end @@ -1389,6 +1385,12 @@ module Prism builder.nil(token(node.location)) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc)) + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) @@ -1766,7 +1768,7 @@ module Prism end else parts = - if node.value == "" + if node.value_loc.nil? [] elsif node.value.include?("\n") string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening) @@ -1807,7 +1809,7 @@ module Prism if (then_keyword_loc = node.then_keyword_loc) token(then_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset) end, visit(node.else_clause), token(node.else_clause&.else_keyword_loc), @@ -1838,7 +1840,7 @@ module Prism if (do_keyword_loc = node.do_keyword_loc) token(do_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset) end, visit(node.statements), token(node.closing_loc) @@ -1862,7 +1864,7 @@ module Prism if (then_keyword_loc = node.then_keyword_loc) token(then_keyword_loc) else - srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";") + srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset) end, visit(node.statements) ) @@ -1882,7 +1884,7 @@ module Prism if (do_keyword_loc = node.do_keyword_loc) token(do_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";") + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset) end, visit(node.statements), token(node.closing_loc) @@ -1966,22 +1968,6 @@ module Prism elements end - # Negate the value of a numeric node. This is a special case where you - # have a negative sign on one line and then a number on the next line. - # In normal Ruby, this will always be a method call. The parser gem, - # however, marks this as a numeric literal. We have to massage the tree - # here to get it into the correct form. - def numeric_negate(message_loc, receiver) - case receiver.type - when :integer_node, :float_node - receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location)) - when :rational_node - receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location)) - when :imaginary_node - receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location)) - end - end - # Blocks can have a special set of parameters that automatically expand # when given arrays if they have a single required parameter and no # other parameters. @@ -2011,16 +1997,16 @@ module Prism Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset]) end - # Constructs a new source range by finding the given character between - # the given start offset and end offset. If the needle is not found, it - # returns nil. Importantly it does not search past newlines or comments. + # Constructs a new source range by finding a semicolon between the given + # start offset and end offset. If the semicolon is not found, it returns + # nil. Importantly it does not search past newlines or comments. # # Note that end_offset is allowed to be nil, in which case this will # search until the end of the string. - def srange_find(start_offset, end_offset, character) - if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/]) + def srange_semicolon(start_offset, end_offset) + if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/]) final_offset = start_offset + match.bytesize - [character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])] + [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])] end end @@ -2192,7 +2178,7 @@ module Prism else lines.sum do |line| count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? } - count -= 1 if !line.end_with?("\n") && count > 0 + count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0 count end end diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 8f2d065b73..e82042867f 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -1,14 +1,16 @@ # frozen_string_literal: true +# :markup: markdown require "strscan" require_relative "../../polyfill/append_as_bytes" +require_relative "../../polyfill/scan_byte" module Prism module Translation class Parser # Accepts a list of prism tokens and converts them into the expected # format for the parser gem. - class Lexer + class Lexer # :nodoc: # These tokens are always skipped TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF]) private_constant :TYPES_ALWAYS_SKIP @@ -16,8 +18,6 @@ module Prism # The direct translating of types between the two lexers. TYPES = { # These tokens should never appear in the output of the lexer. - MISSING: nil, - NOT_PROVIDED: nil, EMBDOC_END: nil, EMBDOC_LINE: nil, @@ -87,6 +87,7 @@ module Prism KEYWORD_DEF: :kDEF, KEYWORD_DEFINED: :kDEFINED, KEYWORD_DO: :kDO, + KEYWORD_DO_BLOCK: :kDO_BLOCK, KEYWORD_DO_LOOP: :kDO_COND, KEYWORD_END: :kEND, KEYWORD_END_UPCASE: :klEND, @@ -188,8 +189,8 @@ module Prism # without them. We should find another way to do this, but in the # meantime we'll hide them from the documentation and mark them as # private constants. - EXPR_BEG = 0x1 # :nodoc: - EXPR_LABEL = 0x400 # :nodoc: + EXPR_BEG = 0x1 + EXPR_LABEL = 0x400 # It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`. # @@ -201,7 +202,7 @@ module Prism # The following token types are listed as those classified as `tLPAREN`. LPAREN_CONVERSION_TOKEN_TYPES = Set.new([ :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3, - :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS + :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY ]) # Types of tokens that are allowed to continue a method call with comments in-between. @@ -232,7 +233,7 @@ module Prism @offset_cache = offset_cache end - Range = ::Parser::Source::Range # :nodoc: + Range = ::Parser::Source::Range private_constant :Range # Convert the prism tokens into the expected format for the parser gem. @@ -275,20 +276,20 @@ module Prism when :tCOMMENT if token.type == :EMBDOC_BEGIN - while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) + while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1) value += next_token.value index += 1 end value += next_token.value - location = range(token.location.start_offset, lexed[index][0].location.end_offset) + location = range(token.location.start_offset, next_token.location.end_offset) index += 1 else is_at_eol = value.chomp!.nil? location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1)) - prev_token = lexed[index - 2][0] if index - 2 >= 0 - next_token = lexed[index][0] + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + next_token, _ = lexed[index] is_inline_comment = prev_token&.location&.start_line == token.location.start_line if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type) @@ -307,7 +308,7 @@ module Prism end end when :tNL - next_token = next_token = lexed[index][0] + next_token, _ = lexed[index] # Newlines after comments are emitted out of order. if next_token&.type == :COMMENT comment_newline_location = location @@ -344,8 +345,8 @@ module Prism location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value)) value = nil when :tSTRING_BEG - next_token = lexed[index][0] - next_next_token = lexed[index + 1][0] + next_token, _ = lexed[index] + next_next_token, _ = lexed[index + 1] basic_quotes = value == '"' || value == "'" if basic_quotes && next_token&.type == :STRING_END @@ -413,7 +414,8 @@ module Prism while token.type == :STRING_CONTENT current_length += token.value.bytesize # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. - is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line # The parser gem only removes indentation when the heredoc is not nested not_nested = heredoc_stack.size == 1 if is_percent_array @@ -423,11 +425,16 @@ module Prism end current_string << unescape_string(value, quote_stack.last) - if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last) + relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I") + 0 # the last backslash escapes the newline + else + token.value[/(\\{1,})\n/, 1]&.length || 0 + end + if relevant_backslash_count.even? || !interpolation?(quote_stack.last) tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]] break end - token = lexed[index][0] + token, _ = lexed[index] index += 1 end else @@ -482,7 +489,7 @@ module Prism end if percent_array?(quote_stack.pop) - prev_token = lexed[index - 2][0] if index - 2 >= 0 + prev_token, _ = lexed[index - 2] if index - 2 >= 0 empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type) ends_with_whitespace = prev_token&.type == :WORDS_SEP # parser always emits a space token after content in a percent array, even if no actual whitespace is present. @@ -491,7 +498,7 @@ module Prism end end when :tSYMBEG - if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END + if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value @@ -506,13 +513,13 @@ module Prism type = :tIDENTIFIER end when :tXSTRING_BEG - if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type) + if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type) # self.`() type = :tBACK_REF2 end quote_stack.push(value) when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG - if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP + if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP index += 1 end @@ -588,9 +595,9 @@ module Prism previous_line = -1 result = Float::MAX - while (lexed[next_token_index] && next_token = lexed[next_token_index][0]) + while (next_token = lexed[next_token_index]&.first) next_token_index += 1 - next_next_token = lexed[next_token_index] && lexed[next_token_index][0] + next_next_token, _ = lexed[next_token_index] first_token_on_line = next_token.location.start_column == 0 # String content inside nested heredocs and interpolation is ignored @@ -761,12 +768,12 @@ module Prism elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/)) # \M-x where x is an ASCII printable character escape_read(result, scanner, control, true) - elsif (byte = scanner.get_byte) + elsif (byte = scanner.scan_byte) # Something else after an escape. - if control && byte == "?" + if control && byte == 0x3f # ASCII '?' result.append_as_bytes(escape_build(0x7f, false, meta)) else - result.append_as_bytes(escape_build(byte.ord, control, meta)) + result.append_as_bytes(escape_build(byte, control, meta)) end end end diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb deleted file mode 100644 index b09266e06a..0000000000 --- a/lib/prism/translation/parser33.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. - class Parser33 < Parser - def version # :nodoc: - 33 - end - end - end -end diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb deleted file mode 100644 index 0ead70ad3c..0000000000 --- a/lib/prism/translation/parser34.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. - class Parser34 < Parser - def version # :nodoc: - 34 - end - end - end -end diff --git a/lib/prism/translation/parser35.rb b/lib/prism/translation/parser35.rb deleted file mode 100644 index a6abc12589..0000000000 --- a/lib/prism/translation/parser35.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.5 of `Prism::Translation::Parser`. - class Parser35 < Parser - def version # :nodoc: - 35 - end - end - end -end diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb index b44769fde7..f7c1070e30 100644 --- a/lib/prism/translation/parser_current.rb +++ b/lib/prism/translation/parser_current.rb @@ -1,4 +1,6 @@ # frozen_string_literal: true +# :markup: markdown +#-- # typed: ignore module Prism @@ -8,11 +10,13 @@ module Prism ParserCurrent = Parser33 when /^3\.4\./ ParserCurrent = Parser34 - when /^3\.5\./ - ParserCurrent = Parser35 + when /^3\.5\./, /^4\.0\./ + ParserCurrent = Parser40 + when /^4\.1\./ + ParserCurrent = Parser41 else # Keep this in sync with released Ruby. - parser = Parser34 + parser = Parser40 major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \ "but you are running #{major}.#{minor}." diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb new file mode 100644 index 0000000000..720c7d548c --- /dev/null +++ b/lib/prism/translation/parser_versions.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true +# :markup: markdown + +module Prism + module Translation + # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. + class Parser33 < Parser + def version # :nodoc: + 33 + end + end + + # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. + class Parser34 < Parser + def version # :nodoc: + 34 + end + end + + # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`. + class Parser40 < Parser + def version # :nodoc: + 40 + end + end + + Parser35 = Parser40 # :nodoc: + + # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`. + class Parser41 < Parser + def version # :nodoc: + 41 + end + end + end +end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 95f366ac91..f179a149a1 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true - -require "ripper" +# :markup: markdown module Prism module Translation @@ -23,22 +22,10 @@ module Prism # - on_comma # - on_ignored_nl # - on_ignored_sp - # - on_kw - # - on_label_end - # - on_lbrace - # - on_lbracket - # - on_lparen # - on_nl - # - on_op # - on_operator_ambiguous - # - on_rbrace - # - on_rbracket - # - on_rparen # - on_semicolon # - on_sp - # - on_symbeg - # - on_tstring_beg - # - on_tstring_end # class Ripper < Compiler # Parses the given Ruby program read from +src+. @@ -70,7 +57,8 @@ module Prism # [[1, 13], :on_kw, "end", END ]] # def self.lex(src, filename = "-", lineno = 1, raise_errors: false) - result = Prism.lex_compat(src, filepath: filename, line: lineno) + coerced = coerce_source(src) + result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding) if result.failure? && raise_errors raise SyntaxError, result.errors.first.message @@ -79,6 +67,34 @@ module Prism end end + # Tokenizes the Ruby program and returns an array of strings. + # The +filename+ and +lineno+ arguments are mostly ignored, since the + # return value is just the tokenized input. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # p Ripper.tokenize("def m(a) nil end") + # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] + # + def self.tokenize(...) + lex(...).map { |token| token[2] } + end + + # Mirros the various lex_types that ripper supports + def self.coerce_source(source) # :nodoc: + if source.is_a?(IO) + source.read + elsif source.respond_to?(:gets) + src = +"" + while line = source.gets + src << line + end + src + else + source.to_str + end + end + # This contains a table of all of the parser events and their # corresponding arity. PARSER_EVENT_TABLE = { @@ -331,7 +347,7 @@ module Prism "__ENCODING__", "__FILE__", "__LINE__" - ] + ].to_set # A list of all of the Ruby binary operators. BINARY_OPERATORS = [ @@ -356,7 +372,7 @@ module Prism :/, :*, :** - ] + ].to_set private_constant :KEYWORDS, :BINARY_OPERATORS @@ -425,9 +441,93 @@ module Prism end end + autoload :Filter, "prism/translation/ripper/filter" + autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" + # Provides optimized access to line and column information. + # Ripper bounds are mostly accessed in a linear fashion, so + # we can try a linear scan first and fall back to binary search. + class LineAndColumnCache # :nodoc: + # How many should it look ahead/behind before falling back to binary searching. + WINDOW = 8 + private_constant :WINDOW + + #: (Source source) -> void + def initialize(source) + @source = source + @offsets = source.offsets + @hint = 0 + end + + #: (Integer byte_offset) -> [Integer, Integer] + def line_and_column(byte_offset) + @hint = new_hint(byte_offset) || @source.find_line(byte_offset) + return [@hint + @source.start_line, byte_offset - @offsets[@hint]] + end + + private + + def new_hint(byte_offset) + if @offsets[@hint] <= byte_offset + # Same line? + if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset) + return @hint + end + + # Scan forwards + limit = [@hint + WINDOW + 1, @offsets.size].min + idx = @hint + 1 + while idx < limit + if @offsets[idx] > byte_offset + return idx - 1 + end + if @offsets[idx] == byte_offset + return idx + end + idx += 1 + end + else + # Scan backwards + limit = @hint > WINDOW ? @hint - WINDOW : 0 + idx = @hint + while idx >= limit + 1 + if @offsets[idx - 1] <= byte_offset + return idx - 1 + end + idx -= 1 + end + end + + nil + end + end + + # :stopdoc: + # This is not part of the public API but used by some gems. + + # Ripper-internal bitflags. + LEX_STATE_NAMES = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze + private_constant :LEX_STATE_NAMES + + LEX_STATE_NAMES.each do |value, key| + const_set("EXPR_#{key}", value) + end + EXPR_NONE = 0 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS + EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG + EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN + + def self.lex_state_name(state) + LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|") + end + + # :startdoc: + # The source that is being parsed. attr_reader :source @@ -437,16 +537,17 @@ module Prism # The current line number of the parser. attr_reader :lineno - # The current column number of the parser. + # The current column in bytes of the parser. attr_reader :column # Create a new Translation::Ripper object with the given source. def initialize(source, filename = "(ripper)", lineno = 1) - @source = source + @source = Ripper.coerce_source(source) @filename = filename @lineno = lineno @column = 0 @result = nil + @line_and_column_cache = nil end ########################################################################## @@ -465,7 +566,12 @@ module Prism bounds(location) if comment.is_a?(InlineComment) - on_comment(comment.slice) + # Inline comments always contain a newline if the line itself contains it + if result.source.source.bytesize > comment.location.end_offset + on_comment("#{comment.slice}\n") + else + on_comment(comment.slice) + end else offset = location.start_offset lines = comment.slice.lines @@ -546,9 +652,14 @@ module Prism # Visitor methods ########################################################################## + # :stopdoc: + # alias foo bar # ^^^^^^^^^^^^^ def visit_alias_method_node(node) + bounds(node.keyword_loc) + on_kw("alias") + new_name = visit(node.new_name) old_name = visit(node.old_name) @@ -559,6 +670,9 @@ module Prism # alias $foo $bar # ^^^^^^^^^^^^^^^ def visit_alias_global_variable_node(node) + bounds(node.keyword_loc) + on_kw("alias") + new_name = visit_alias_global_variable_node_value(node.new_name) old_name = visit_alias_global_variable_node_value(node.old_name) @@ -584,6 +698,10 @@ module Prism # ^^^^^^^^^ def visit_alternation_pattern_node(node) left = visit_pattern_node(node.left) + + bounds(node.operator_loc) + on_op("|") + right = visit_pattern_node(node.right) bounds(node.location) @@ -594,7 +712,13 @@ module Prism # parenthesis node that can be used to wrap patterns. private def visit_pattern_node(node) if node.is_a?(ParenthesesNode) - visit(node.body) + bounds(node.opening_loc) + on_lparen("(") + result = visit(node.body) + bounds(node.closing_loc) + on_rparen(")") + + result else visit(node) end @@ -604,6 +728,14 @@ module Prism # ^^^^^^^ def visit_and_node(node) left = visit(node.left) + + bounds(node.operator_loc) + if node.operator == "and" + on_kw("and") + else + on_op("&&") + end + right = visit(node.right) bounds(node.location) @@ -631,6 +763,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%i/ @@ -650,6 +784,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%W/ @@ -687,6 +823,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%I/ @@ -724,6 +862,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) else @@ -740,15 +880,21 @@ module Prism on_array(elements) end - # Dispatch a words_sep event that contains the space between the elements + # Dispatch words_sep events that contains the whitespace between the elements # of list literals. private def visit_words_sep(opening_loc, previous, current) - end_offset = (previous.nil? ? opening_loc : previous.location).end_offset - start_offset = current.location.start_offset - - if end_offset != start_offset - bounds(current.location.copy(start_offset: end_offset)) - on_words_sep(source.byteslice(end_offset...start_offset)) + start_offset = (previous.nil? ? opening_loc : previous.location).end_offset + end_offset = current.start_offset + length = end_offset - start_offset + + if length > 0 + whitespace = source.byteslice(start_offset, length) + current_offset = start_offset + whitespace.each_line do |part| + bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize)) + on_words_sep(part) + current_offset += part.bytesize + end end end @@ -774,9 +920,18 @@ module Prism # ^^^^^ def visit_array_pattern_node(node) constant = visit(node.constant) + + if node.opening_loc + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + end + requireds = visit_all(node.requireds) if node.requireds.any? rest = if (rest_node = node.rest).is_a?(SplatNode) + bounds(rest_node.operator_loc) + on_op("*") + if rest_node.expression.nil? bounds(rest_node.location) on_var_field(nil) @@ -787,6 +942,10 @@ module Prism posts = visit_all(node.posts) if node.posts.any? + if node.closing_loc + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + end bounds(node.location) on_aryptn(constant, requireds, rest, posts) end @@ -802,6 +961,12 @@ module Prism # ^^^^ def visit_assoc_node(node) key = visit(node.key) + + if node.operator_loc + bounds(node.operator_loc) + on_op("=>") + end + value = visit(node.value) bounds(node.location) @@ -814,6 +979,9 @@ module Prism # { **foo } # ^^^^^ def visit_assoc_splat_node(node) + bounds(node.operator_loc) + on_op("**") + value = visit(node.value) bounds(node.location) @@ -830,8 +998,18 @@ module Prism # begin end # ^^^^^^^^^ def visit_begin_node(node) + if node.begin_keyword_loc + bounds(node.begin_keyword_loc) + on_kw("begin") + end + clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false) + if node.end_keyword_loc + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) on_begin(clauses) end @@ -843,7 +1021,7 @@ module Prism on_stmts_add(on_stmts_new, on_void_stmt) else body = node.statements.body - body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline) bounds(node.statements.location) visit_statements_node_body(body) @@ -852,12 +1030,15 @@ module Prism rescue_clause = visit(node.rescue_clause) else_clause = unless (else_clause_node = node.else_clause).nil? + bounds(else_clause_node.else_keyword_loc) + on_kw("else") + else_statements = if else_clause_node.statements.nil? [nil] else body = else_clause_node.statements.body - body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) body end @@ -879,7 +1060,7 @@ module Prism on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil) when StatementsNode body = [*node.body] - body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline) stmts = visit_statements_node_body(body) bounds(node.body.first.location) @@ -894,6 +1075,8 @@ module Prism # foo(&bar) # ^^^^ def visit_block_argument_node(node) + bounds(node.operator_loc) + on_op("&") visit(node.expression) end @@ -907,6 +1090,13 @@ module Prism # Visit a BlockNode. def visit_block_node(node) braces = node.opening == "{" + bounds(node.opening_loc) + if braces + on_lbrace("{") + else + on_kw("do") + end + parameters = visit(node.parameters) body = @@ -919,7 +1109,7 @@ module Prism braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -931,6 +1121,14 @@ module Prism end if braces + bounds(node.closing_loc) + on_rbrace("}") + else + bounds(node.closing_loc) + on_kw("end") + end + + if braces bounds(node.location) on_brace_block(parameters, body) else @@ -942,12 +1140,15 @@ module Prism # def foo(&bar); end # ^^^^ def visit_block_parameter_node(node) + bounds(node.operator_loc) + on_op("&") + if node.name_loc.nil? bounds(node.location) on_blockarg(nil) else bounds(node.name_loc) - name = visit_token(node.name.to_s) + name = on_ident(node.name.to_s) bounds(node.location) on_blockarg(name) @@ -956,6 +1157,9 @@ module Prism # A block's parameters. def visit_block_parameters_node(node) + bounds(node.opening_loc) + on_op("|") + parameters = if node.parameters.nil? on_params(nil, nil, nil, nil, nil, nil, nil) @@ -970,6 +1174,9 @@ module Prism false end + bounds(node.closing_loc) + on_op("|") + bounds(node.location) on_block_var(parameters, locals) end @@ -980,6 +1187,9 @@ module Prism # break foo # ^^^^^^^^^ def visit_break_node(node) + bounds(node.keyword_loc) + on_kw("break") + if node.arguments.nil? bounds(node.location) on_break(on_args_new) @@ -1004,20 +1214,32 @@ module Prism case node.name when :[] receiver = visit(node.receiver) - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.opening_loc) + on_lbracket("[") + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.closing_loc) + on_rbracket("]") + + block = visit(block_node) bounds(node.location) call = on_aref(receiver, arguments) - if block.nil? - call - else + if block_node bounds(node.location) on_method_add_block(call, block) + else + call end when :[]= receiver = visit(node.receiver) + bounds(node.opening_loc) + on_lbracket("[") + *arguments, last_argument = node.arguments.arguments arguments << node.block if !node.block.nil? @@ -1033,6 +1255,11 @@ module Prism end end + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.equal_loc) + on_op("=") + bounds(node.location) call = on_aref_field(receiver, arguments) value = visit_write_value(last_argument) @@ -1040,27 +1267,54 @@ module Prism bounds(last_argument.location) on_assign(call, value) when :-@, :+@, :~ - receiver = visit(node.receiver) + bounds(node.message_loc) + on_op(node.message) + receiver = visit(node.receiver) bounds(node.location) on_unary(node.name, receiver) when :! + bounds(node.message_loc) if node.message == "not" + on_kw("not") + + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + receiver = - if !node.receiver.is_a?(ParenthesesNode) || !node.receiver.body.nil? + if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil? + # The parens in `not()` just emit parens and nothing else. + bounds(node.receiver.opening_loc) + on_lparen("(") + bounds(node.receiver.closing_loc) + on_rparen(")") + nil + else visit(node.receiver) end + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end bounds(node.location) on_unary(:not, receiver) else + on_op("!") + receiver = visit(node.receiver) bounds(node.location) on_unary(:!, receiver) end - when *BINARY_OPERATORS + when BINARY_OPERATORS receiver = visit(node.receiver) + + bounds(node.message_loc) + on_op(node.message) + value = visit(node.arguments.arguments.first) bounds(node.location) @@ -1072,9 +1326,21 @@ module Prism if node.variable_call? on_vcall(message) else - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + + block = visit(block_node) call = - if node.opening_loc.nil? && arguments&.any? + if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any? bounds(node.location) on_command(message, arguments) elsif !node.opening_loc.nil? @@ -1085,11 +1351,11 @@ module Prism on_method_add_arg(on_fcall(message), on_args_new) end - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end end @@ -1097,7 +1363,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) message = if node.message_loc.nil? @@ -1107,13 +1373,30 @@ module Prism visit_token(node.message, false) end + if node.equal_loc + bounds(node.equal_loc) + on_op("=") + end + if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil? value = visit_write_value(node.arguments.arguments.first) bounds(node.location) on_assign(on_field(receiver, call_operator, message), value) else - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + + block = visit(block_node) call = if node.opening_loc.nil? bounds(node.location) @@ -1131,27 +1414,35 @@ module Prism on_method_add_arg(on_call(receiver, call_operator, message), arguments) end - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end end end - # Visit the arguments and block of a call node and return the arguments - # and block as they should be used. - private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + # Extract the arguments and block Ripper-style, which means if the block + # is like `&b` then it's moved to arguments. + private def get_arguments_and_block(arguments_node, block_node) arguments = arguments_node&.arguments || [] block = block_node if block.is_a?(BlockArgumentNode) - arguments << block + arguments += [block] block = nil end + [arguments, block] + end + + # Visit the arguments and block of a call node and return the arguments + # and block as they should be used. + private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + arguments, block = get_arguments_and_block(arguments_node, block_node) + [ if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode) visit(arguments.first) @@ -1165,7 +1456,7 @@ module Prism on_args_add_block(args, false) end end, - visit(block) + block, ] end @@ -1183,7 +1474,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1205,7 +1496,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1227,7 +1518,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1249,6 +1540,9 @@ module Prism if node.call_operator == "::" receiver = visit(node.receiver) + bounds(node.call_operator_loc) + on_op("::") + bounds(node.message_loc) message = visit_token(node.message) @@ -1258,7 +1552,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1272,6 +1566,10 @@ module Prism # ^^^^^^^^^^ def visit_capture_pattern_node(node) value = visit(node.value) + + bounds(node.operator_loc) + on_op("=>") + target = visit(node.target) bounds(node.location) @@ -1281,10 +1579,21 @@ module Prism # case foo; when bar; end # ^^^^^^^^^^^^^^^^^^^^^^^ def visit_case_node(node) + bounds(node.case_keyword_loc) + on_kw("case") + predicate = visit(node.predicate) + visited_conditions = node.conditions.map { |condition| visit(condition) } + visited_else_clause = visit(node.else_clause) + + if !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + clauses = - node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition| - on_when(*visit(condition), current) + visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition| + on_when(*condition, current) end bounds(node.location) @@ -1294,10 +1603,23 @@ module Prism # case foo; in bar; end # ^^^^^^^^^^^^^^^^^^^^^ def visit_case_match_node(node) + bounds(node.case_keyword_loc) + on_kw("case") + predicate = visit(node.predicate) + visited_conditions = node.conditions.map do | condition| + visit(condition) + end + visited_else_clause = visit(node.else_clause) + + if !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + clauses = - node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition| - on_in(*visit(condition), current) + visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition| + on_in(*condition, current) end bounds(node.location) @@ -1307,6 +1629,9 @@ module Prism # class Foo; end # ^^^^^^^^^^^^^^ def visit_class_node(node) + bounds(node.class_keyword_loc) + on_kw("class") + constant_path = if node.constant_path.is_a?(ConstantReadNode) bounds(node.constant_path.location) @@ -1315,9 +1640,17 @@ module Prism visit(node.constant_path) end + if node.inheritance_operator_loc + bounds(node.inheritance_operator_loc) + on_op("<") + end + superclass = visit(node.superclass) bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_class(constant_path, superclass, bodystmt) end @@ -1331,12 +1664,13 @@ module Prism # @@foo = 1 # ^^^^^^^^^ - # - # @@foo, @@bar = 1 - # ^^^^^ ^^^^^ def visit_class_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_cvar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1401,12 +1735,13 @@ module Prism # Foo = 1 # ^^^^^^^ - # - # Foo, Bar = 1 - # ^^^ ^^^ def visit_constant_write_node(node) bounds(node.name_loc) target = on_var_field(on_const(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1466,6 +1801,11 @@ module Prism # ^^^^^^^^ def visit_constant_path_node(node) if node.parent.nil? + if node.delimiter_loc + bounds(node.delimiter_loc) + on_op("::") + end + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1474,6 +1814,9 @@ module Prism else parent = visit(node.parent) + bounds(node.delimiter_loc) + on_op("::") + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1484,11 +1827,12 @@ module Prism # Foo::Bar = 1 # ^^^^^^^^^^^^ - # - # Foo::Foo, Bar::Bar = 1 - # ^^^^^^^^ ^^^^^^^^ def visit_constant_path_write_node(node) target = visit_constant_path_write_node_target(node.target) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1498,6 +1842,11 @@ module Prism # Visit a constant path that is part of a write node. private def visit_constant_path_write_node_target(node) if node.parent.nil? + if node.delimiter_loc + bounds(node.delimiter_loc) + on_op("::") + end + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1506,6 +1855,9 @@ module Prism else parent = visit(node.parent) + bounds(node.delimiter_loc) + on_op("::") + bounds(node.name_loc) child = on_const(node.name.to_s) @@ -1518,7 +1870,6 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_constant_path_operator_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.binary_operator_loc) operator = on_op("#{node.binary_operator}=") @@ -1532,7 +1883,6 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_constant_path_and_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.operator_loc) operator = on_op("&&=") @@ -1546,7 +1896,6 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_constant_path_or_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.operator_loc) operator = on_op("||=") @@ -1568,16 +1917,24 @@ module Prism # def self.foo; end # ^^^^^^^^^^^^^^^^^ def visit_def_node(node) + bounds(node.def_keyword_loc) + on_kw("def") + receiver = visit(node.receiver) operator = if !node.operator_loc.nil? bounds(node.operator_loc) - visit_token(node.operator) + node.operator == "." ? on_period(".") : on_op("::") end bounds(node.name_loc) name = visit_token(node.name_loc.slice) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + parameters = if node.parameters.nil? bounds(node.location) @@ -1587,10 +1944,17 @@ module Prism end if !node.lparen_loc.nil? + bounds(node.rparen_loc) + on_rparen(")") bounds(node.lparen_loc) parameters = on_paren(parameters) end + if node.equal_loc + bounds(node.equal_loc) + on_op("=") + end + bodystmt = if node.equal_loc.nil? visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body) @@ -1601,11 +1965,16 @@ module Prism on_bodystmt(body, nil, nil, nil) end + if node.end_keyword_loc + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) - if receiver.nil? - on_def(name, parameters, bodystmt) - else + if receiver on_defs(receiver, operator, name, parameters, bodystmt) + else + on_def(name, parameters, bodystmt) end end @@ -1615,8 +1984,21 @@ module Prism # defined?(a) # ^^^^^^^^^^^ def visit_defined_node(node) + bounds(node.keyword_loc) + on_kw("defined?") + + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + expression = visit(node.value) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + # Very weird circumstances here where something like: # # defined? @@ -1637,17 +2019,24 @@ module Prism # if foo then bar else baz end # ^^^^^^^^^^^^ def visit_else_node(node) + bounds(node.else_keyword_loc) + on_kw("else") + statements = if node.statements.nil? [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) + body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) body end + else_statements = visit_statements_node_body(statements) + + bounds(node.end_keyword_loc) + on_kw("end") bounds(node.location) - on_else(visit_statements_node_body(statements)) + on_else(else_statements) end # "foo #{bar}" @@ -1685,12 +2074,15 @@ module Prism # Visit an EnsureNode node. def visit_ensure_node(node) + bounds(node.ensure_keyword_loc) + on_kw("ensure") + statements = if node.statements.nil? [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false) + body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false) body end @@ -1711,6 +2103,14 @@ module Prism # ^^^^^^^^^^^ def visit_find_pattern_node(node) constant = visit(node.constant) + + if node.opening_loc + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + end + bounds(node.left.operator_loc) + on_op("*") + left = if node.left.expression.nil? bounds(node.left.location) @@ -1720,6 +2120,10 @@ module Prism end requireds = visit_all(node.requireds) if node.requireds.any? + + bounds(node.right.operator_loc) + on_op("*") + right = if node.right.expression.nil? bounds(node.right.location) @@ -1728,6 +2132,10 @@ module Prism visit(node.right.expression) end + if node.closing_loc + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + end bounds(node.location) on_fndptn(constant, left, requireds, right) end @@ -1736,6 +2144,10 @@ module Prism # ^^^^^^^^^^ def visit_flip_flop_node(node) left = visit(node.left) + + bounds(node.operator_loc) + on_op(node.operator) + right = visit(node.right) bounds(node.location) @@ -1755,8 +2167,18 @@ module Prism # for foo in bar do end # ^^^^^^^^^^^^^^^^^^^^^ def visit_for_node(node) + bounds(node.for_keyword_loc) + on_kw("for") + index = visit(node.index) + bounds(node.in_keyword_loc) + on_kw("in") + collection = visit(node.collection) + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end statements = if node.statements.nil? bounds(node.location) @@ -1765,6 +2187,9 @@ module Prism visit(node.statements) end + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_for(index, collection, statements) end @@ -1773,6 +2198,7 @@ module Prism # ^^^ def visit_forwarding_arguments_node(node) bounds(node.location) + on_op("...") on_args_forward end @@ -1780,6 +2206,7 @@ module Prism # ^^^ def visit_forwarding_parameter_node(node) bounds(node.location) + on_op("...") on_args_forward end @@ -1789,6 +2216,9 @@ module Prism # super {} # ^^^^^^^^ def visit_forwarding_super_node(node) + bounds(node.keyword_loc) + on_kw("super") + if node.block.nil? bounds(node.location) on_zsuper @@ -1809,12 +2239,13 @@ module Prism # $foo = 1 # ^^^^^^^^ - # - # $foo, $bar = 1 - # ^^^^ ^^^^ def visit_global_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_gvar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1873,6 +2304,9 @@ module Prism # {} # ^^ def visit_hash_node(node) + bounds(node.opening_loc) + on_lbrace("{") + elements = if node.elements.any? args = visit_all(node.elements) @@ -1881,6 +2315,8 @@ module Prism on_assoclist_from_args(args) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_hash(elements) end @@ -1889,6 +2325,15 @@ module Prism # ^^ def visit_hash_pattern_node(node) constant = visit(node.constant) + + if node.constant + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + elsif node.opening_loc + bounds(node.opening_loc) + on_lbrace("{") + end + elements = if node.elements.any? || !node.rest.nil? node.elements.map do |element| @@ -1911,12 +2356,21 @@ module Prism rest = case node.rest when AssocSplatNode + bounds(node.rest.operator_loc) + on_op("**") visit(node.rest.value) when NoKeywordsParameterNode bounds(node.rest.location) on_var_field(visit(node.rest)) end + if node.constant + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + elsif node.closing_loc + bounds(node.closing_loc) + on_rbrace("}") + end bounds(node.location) on_hshptn(constant, elements, rest) end @@ -1932,13 +2386,27 @@ module Prism def visit_if_node(node) if node.then_keyword == "?" predicate = visit(node.predicate) + + bounds(node.then_keyword_loc) + on_op("?") + truthy = visit(node.statements.body.first) + + bounds(node.subsequent.else_keyword_loc) + on_op(":") + falsy = visit(node.subsequent.statements.body.first) bounds(node.location) on_ifop(predicate, truthy, falsy) elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.if_keyword_loc) + on_kw(node.if_keyword) predicate = visit(node.predicate) + if node.then_keyword_loc && node.then_keyword != "?" + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -1948,6 +2416,11 @@ module Prism end subsequent = visit(node.subsequent) + if node.end_keyword_loc && !node.subsequent + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) if node.if_keyword == "if" on_if(predicate, statements, subsequent) @@ -1956,6 +2429,8 @@ module Prism end else statements = visit(node.statements.body.first) + bounds(node.if_keyword_loc) + on_kw(node.if_keyword) predicate = visit(node.predicate) bounds(node.location) @@ -1987,7 +2462,14 @@ module Prism # This is a special case where we're not going to call on_in directly # because we don't have access to the subsequent. Instead, we'll return # the component parts and let the parent node handle it. + bounds(node.in_loc) + on_kw("in") + pattern = visit_pattern_node(node.pattern) + if node.then_loc + bounds(node.then_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -2003,8 +2485,15 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_index_operator_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2020,8 +2509,15 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_index_and_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2037,8 +2533,15 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_index_or_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2054,8 +2557,15 @@ module Prism # ^^^^^^^^ def visit_index_target_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) on_aref_field(receiver, arguments) end @@ -2072,6 +2582,10 @@ module Prism def visit_instance_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_ivar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -2174,20 +2688,37 @@ module Prism # "foo #{bar}" # ^^^^^^^^^^^^ def visit_interpolated_string_node(node) - if node.opening&.start_with?("<<~") - heredoc = visit_heredoc_string_node(node) + with_string_bounds(node) do + if node.opening&.start_with?("<<~") + heredoc = visit_heredoc_string_node(node) - bounds(node.location) - on_string_literal(heredoc) - elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } - first, *rest = node.parts - rest.inject(visit(first)) do |content, part| - concat = visit(part) + bounds(node.location) + on_string_literal(heredoc) + elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } + first, *rest = node.parts + rest.inject(visit(first)) do |content, part| + concat = visit(part) + + bounds(part.location) + on_string_concat(content, concat) + end + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end - bounds(part.location) - on_string_concat(content, concat) + bounds(node.location) + on_string_literal(parts) end - else + end + end + + # :"foo #{bar}" + # ^^^^^^^^^^^^^ + def visit_interpolated_symbol_node(node) + with_string_bounds(node) do bounds(node.parts.first.location) parts = node.parts.inject(on_string_content) do |content, part| @@ -2195,40 +2726,29 @@ module Prism end bounds(node.location) - on_string_literal(parts) + on_dyna_symbol(parts) end end - # :"foo #{bar}" - # ^^^^^^^^^^^^^ - def visit_interpolated_symbol_node(node) - bounds(node.parts.first.location) - parts = - node.parts.inject(on_string_content) do |content, part| - on_string_add(content, visit_string_content(part)) - end - - bounds(node.location) - on_dyna_symbol(parts) - end - # `foo #{bar}` # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) - if node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node) + with_string_bounds(node) do + if node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.parts.first.location) - parts = - node.parts.inject(on_xstring_new) do |content, part| - on_xstring_add(content, visit_string_content(part)) - end + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_xstring_new) do |content, part| + on_xstring_add(content, visit_string_content(part)) + end - bounds(node.location) - on_xstring_literal(parts) + bounds(node.location) + on_xstring_literal(parts) + end end end @@ -2269,6 +2789,9 @@ module Prism # def foo(**); end # ^^ def visit_keyword_rest_parameter_node(node) + bounds(node.operator_loc) + on_op("**") + if node.name_loc.nil? bounds(node.location) on_kwrest_param(nil) @@ -2288,6 +2811,11 @@ module Prism parameters = if node.parameters.is_a?(BlockParametersNode) + if node.parameters.opening_loc + bounds(node.parameters.opening_loc) + on_lparen("(") + end + # Ripper does not track block-locals within lambdas, so we skip # directly to the parameters here. params = @@ -2298,6 +2826,13 @@ module Prism visit(node.parameters.parameters) end + visit_all(node.parameters.locals) + + if node.parameters.closing_loc + bounds(node.parameters.closing_loc) + on_rparen(")") + end + if node.parameters.opening_loc.nil? params else @@ -2310,9 +2845,11 @@ module Prism end braces = node.opening == "{" + bounds(node.opening_loc) if braces - bounds(node.opening_loc) on_tlambeg(node.opening) + else + on_kw("do") end body = @@ -2325,7 +2862,7 @@ module Prism braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -2336,6 +2873,13 @@ module Prism raise end + bounds(node.closing_loc) + if braces + on_rbrace("}") + else + on_kw("end") + end + bounds(node.location) on_lambda(parameters, body) end @@ -2352,6 +2896,10 @@ module Prism def visit_local_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_ident(node.name_loc.slice)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -2426,6 +2974,8 @@ module Prism # ^^^^^^^^^^ def visit_match_predicate_node(node) value = visit(node.value) + bounds(node.operator_loc) + on_kw("in") pattern = on_in(visit_pattern_node(node.pattern), nil, nil) on_case(value, pattern) @@ -2435,6 +2985,10 @@ module Prism # ^^^^^^^^^^ def visit_match_required_node(node) value = visit(node.value) + + bounds(node.operator_loc) + on_op("=>") + pattern = on_in(visit_pattern_node(node.pattern), nil, nil) on_case(value, pattern) @@ -2448,13 +3002,16 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. - def visit_missing_node(node) - raise "Cannot visit missing nodes directly." + def visit_error_recovery_node(node) + raise "Cannot visit error recovery nodes directly." end # module Foo; end # ^^^^^^^^^^^^^^^ def visit_module_node(node) + bounds(node.module_keyword_loc) + on_kw("module") + constant_path = if node.constant_path.is_a?(ConstantReadNode) bounds(node.constant_path.location) @@ -2465,6 +3022,9 @@ module Prism bodystmt = visit_body_node(node.constant_path.location, node.body, true) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_module(constant_path, bodystmt) end @@ -2472,9 +3032,19 @@ module Prism # (foo, bar), bar = qux # ^^^^^^^^^^ def visit_multi_target_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + if node.lparen_loc.nil? targets else @@ -2526,9 +3096,22 @@ module Prism # foo, bar = baz # ^^^^^^^^^^^^^^ def visit_multi_write_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + bounds(node.operator_loc) + on_op("=") + unless node.lparen_loc.nil? bounds(node.lparen_loc) targets = on_mlhs_paren(targets) @@ -2546,6 +3129,9 @@ module Prism # next foo # ^^^^^^^^ def visit_next_node(node) + bounds(node.keyword_loc) + on_kw("next") + if node.arguments.nil? bounds(node.location) on_next(on_args_new) @@ -2564,9 +3150,24 @@ module Prism on_var_ref(on_kw("nil")) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + bounds(node.operator_loc) + on_op("&") + bounds(node.keyword_loc) + on_kw("nil") + bounds(node.location) + on_blockarg(:nil) + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) + bounds(node.operator_loc) + on_op("**") + bounds(node.keyword_loc) + on_kw("nil") bounds(node.location) on_nokw_param(nil) @@ -2599,7 +3200,11 @@ module Prism # ^^^^^^^ def visit_optional_parameter_node(node) bounds(node.name_loc) - name = visit_token(node.name.to_s) + name = on_ident(node.name.to_s) + + bounds(node.operator_loc) + on_op("=") + value = visit(node.value) [name, value] @@ -2609,6 +3214,14 @@ module Prism # ^^^^^^ def visit_or_node(node) left = visit(node.left) + + bounds(node.operator_loc) + if node.operator == "or" + on_kw("or") + else + on_op("||") + end + right = visit(node.right) bounds(node.location) @@ -2632,9 +3245,19 @@ module Prism # Visit a destructured positional parameter node. private def visit_destructured_parameter_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + bounds(node.lparen_loc) on_mlhs_paren(targets) end @@ -2645,6 +3268,9 @@ module Prism # (1) # ^^^ def visit_parentheses_node(node) + bounds(node.opening_loc) + on_lparen("(") + body = if node.body.nil? on_stmts_add(on_stmts_new, on_void_stmt) @@ -2652,6 +3278,8 @@ module Prism visit(node.body) end + bounds(node.closing_loc) + on_rparen(")") bounds(node.location) on_paren(body) end @@ -2659,8 +3287,15 @@ module Prism # foo => ^(bar) # ^^^^^^ def visit_pinned_expression_node(node) + bounds(node.operator_loc) + on_op("^") + bounds(node.lparen_loc) + on_lparen("(") + expression = visit(node.expression) + bounds(node.rparen_loc) + on_rparen(")") bounds(node.location) on_begin(expression) end @@ -2668,12 +3303,20 @@ module Prism # foo = 1 and bar => ^foo # ^^^^ def visit_pinned_variable_node(node) + bounds(node.operator_loc) + on_op("^") + visit(node.variable) end # END {} # ^^^^^^ def visit_post_execution_node(node) + bounds(node.keyword_loc) + on_kw("END") + bounds(node.opening_loc) + on_lbrace("{") + statements = if node.statements.nil? bounds(node.location) @@ -2682,6 +3325,8 @@ module Prism visit(node.statements) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_END(statements) end @@ -2689,6 +3334,11 @@ module Prism # BEGIN {} # ^^^^^^^^ def visit_pre_execution_node(node) + bounds(node.keyword_loc) + on_kw("BEGIN") + bounds(node.opening_loc) + on_lbrace("{") + statements = if node.statements.nil? bounds(node.location) @@ -2697,6 +3347,8 @@ module Prism visit(node.statements) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_BEGIN(statements) end @@ -2704,7 +3356,7 @@ module Prism # The top-level program node. def visit_program_node(node) body = node.statements.body - body << nil if body.empty? + body = [nil] if body.empty? statements = visit_statements_node_body(body) bounds(node.location) @@ -2715,6 +3367,10 @@ module Prism # ^^^^ def visit_range_node(node) left = visit(node.left) + + bounds(node.operator_loc) + on_op(node.operator) + right = visit(node.right) bounds(node.location) @@ -2735,6 +3391,7 @@ module Prism # ^^^^ def visit_redo_node(node) bounds(node.location) + on_kw("redo") on_redo end @@ -2777,6 +3434,9 @@ module Prism # foo rescue bar # ^^^^^^^^^^^^^^ def visit_rescue_modifier_node(node) + bounds(node.keyword_loc) + on_kw("rescue") + expression = visit_write_value(node.expression) rescue_expression = visit(node.rescue_expression) @@ -2787,6 +3447,9 @@ module Prism # begin; rescue; end # ^^^^^^^ def visit_rescue_node(node) + bounds(node.keyword_loc) + on_kw("rescue") + exceptions = case node.exceptions.length when 0 @@ -2824,6 +3487,11 @@ module Prism end end + if node.operator_loc + bounds(node.operator_loc) + on_op("=>") + end + reference = visit(node.reference) statements = if node.statements.nil? @@ -2845,12 +3513,15 @@ module Prism # def foo(*); end # ^ def visit_rest_parameter_node(node) + bounds(node.operator_loc) + on_op("*") + if node.name_loc.nil? bounds(node.location) on_rest_param(nil) else bounds(node.name_loc) - on_rest_param(visit_token(node.name.to_s)) + on_rest_param(on_ident(node.name.to_s)) end end @@ -2858,6 +3529,7 @@ module Prism # ^^^^^ def visit_retry_node(node) bounds(node.location) + on_kw("retry") on_retry end @@ -2867,6 +3539,9 @@ module Prism # return 1 # ^^^^^^^^ def visit_return_node(node) + bounds(node.keyword_loc) + on_kw("return") + if node.arguments.nil? bounds(node.location) on_return0 @@ -2893,9 +3568,17 @@ module Prism # class << self; end # ^^^^^^^^^^^^^^^^^^ def visit_singleton_class_node(node) + bounds(node.class_keyword_loc) + on_kw("class") + bounds(node.operator_loc) + on_op("<<") + expression = visit(node.expression) bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_sclass(expression, bodystmt) end @@ -2930,6 +3613,8 @@ module Prism # def foo(*); bar(*); end # ^ def visit_splat_node(node) + bounds(node.operator_loc) + on_op("*") visit(node.expression) end @@ -2952,26 +3637,68 @@ module Prism # "foo" # ^^^^^ def visit_string_node(node) - if (content = node.content).empty? - bounds(node.location) - on_string_literal(on_string_content) - elsif (opening = node.opening) == "?" - bounds(node.location) - on_CHAR("?#{node.content}") - elsif opening.start_with?("<<~") - heredoc = visit_heredoc_string_node(node.to_interpolated) + with_string_bounds(node) do + if (content = node.content).empty? + bounds(node.location) + on_string_literal(on_string_content) + elsif (opening = node.opening) == "?" + bounds(node.location) + on_CHAR("?#{node.content}") + elsif opening.start_with?("<<~") + heredoc = visit_heredoc_string_node(node.to_interpolated) - bounds(node.location) - on_string_literal(heredoc) - else - bounds(node.content_loc) - tstring_content = on_tstring_content(content) + bounds(node.location) + on_string_literal(heredoc) + else + bounds(node.content_loc) + tstring_content = on_tstring_content(content) - bounds(node.location) - on_string_literal(on_string_add(on_string_content, tstring_content)) + bounds(node.location) + on_string_literal(on_string_add(on_string_content, tstring_content)) + end end end + # Responsible for emitting the various string-like begin/end events + private def with_string_bounds(node) + # `foo "bar": baz` doesn't emit the closing location + assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":") + + is_heredoc = opening&.start_with?("<<") + if is_heredoc + bounds(node.opening_loc) + on_heredoc_beg(node.opening) + elsif opening&.start_with?(":", "%s") + bounds(node.opening_loc) + on_symbeg(node.opening) + elsif opening&.start_with?("`", "%x") + bounds(node.opening_loc) + on_backtick(node.opening) + elsif opening && !opening.start_with?("?") + bounds(node.opening_loc) + on_tstring_beg(opening) + end + + result = yield + if assoc + if node.closing != ":" + bounds(node.closing_loc) + on_label_end(node.closing) + end + return result + end + + if is_heredoc + bounds(node.closing_loc) + on_heredoc_end(node.closing) + elsif node.closing_loc + bounds(node.closing_loc) + on_tstring_end(node.closing) + end + + result + end + # Ripper gives back the escaped string content but strips out the common # leading whitespace. Prism gives back the unescaped string content and # a location for the escaped string content. Unfortunately these don't @@ -3049,42 +3776,39 @@ module Prism # Visit a heredoc node that is representing a string. private def visit_heredoc_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_string_content) do |parts, part| - on_string_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_string_content) do |parts, part| + on_string_add(parts, part) + end end # Visit a heredoc node that is representing an xstring. private def visit_heredoc_x_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| - on_xstring_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| + on_xstring_add(parts, part) + end end # super(foo) # ^^^^^^^^^^ def visit_super_node(node) - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + bounds(node.keyword_loc) + on_kw("super") + + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + block = visit(block_node) if !node.lparen_loc.nil? bounds(node.lparen_loc) @@ -3094,35 +3818,36 @@ module Prism bounds(node.location) call = on_super(arguments) - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end # :foo # ^^^^ def visit_symbol_node(node) - if (opening = node.opening)&.match?(/^%s|['"]:?$/) - bounds(node.value_loc) - content = on_string_content - - if !(value = node.value).empty? - content = on_string_add(content, on_tstring_content(value)) + with_string_bounds(node) do + if node.value_loc.nil? + bounds(node.location) + on_dyna_symbol(on_string_content) + elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) + bounds(node.value_loc) + content = on_string_add(on_string_content, on_tstring_content(node.value)) + bounds(node.location) + on_dyna_symbol(content) + elsif (closing = node.closing) == ":" + bounds(node.location) + on_label("#{node.value}:") + elsif opening.nil? && node.closing_loc.nil? + bounds(node.value_loc) + on_symbol_literal(visit_token(node.value)) + else + bounds(node.value_loc) + on_symbol_literal(on_symbol(visit_token(node.value))) end - - on_dyna_symbol(content) - elsif (closing = node.closing) == ":" - bounds(node.location) - on_label("#{node.value}:") - elsif opening.nil? && node.closing_loc.nil? - bounds(node.value_loc) - on_symbol_literal(visit_token(node.value)) - else - bounds(node.value_loc) - on_symbol_literal(on_symbol(visit_token(node.value))) end end @@ -3136,6 +3861,9 @@ module Prism # undef foo # ^^^^^^^^^ def visit_undef_node(node) + bounds(node.keyword_loc) + on_kw("undef") + names = visit_all(node.names) bounds(node.location) @@ -3149,7 +3877,13 @@ module Prism # ^^^^^^^^^^^^^^ def visit_unless_node(node) if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.keyword_loc) + on_kw("unless") predicate = visit(node.predicate) + if node.then_keyword_loc + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -3159,10 +3893,17 @@ module Prism end else_clause = visit(node.else_clause) + if node.end_keyword_loc && !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) on_unless(predicate, statements, else_clause) else statements = visit(node.statements.body.first) + bounds(node.keyword_loc) + on_kw("unless") predicate = visit(node.predicate) bounds(node.location) @@ -3176,7 +3917,14 @@ module Prism # bar until foo # ^^^^^^^^^^^^^ def visit_until_node(node) + bounds(node.keyword_loc) + on_kw("until") + if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end predicate = visit(node.predicate) statements = if node.statements.nil? @@ -3186,6 +3934,11 @@ module Prism visit(node.statements) end + if node.closing_loc + bounds(node.closing_loc) + on_kw("end") + end + bounds(node.location) on_until(predicate, statements) else @@ -3203,7 +3956,14 @@ module Prism # This is a special case where we're not going to call on_when directly # because we don't have access to the subsequent. Instead, we'll return # the component parts and let the parent node handle it. + bounds(node.keyword_loc) + on_kw("when") + conditions = visit_arguments(node.conditions) + if node.then_keyword_loc + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -3222,7 +3982,17 @@ module Prism # ^^^^^^^^^^^^^ def visit_while_node(node) if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.keyword_loc) + on_kw("while") + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end predicate = visit(node.predicate) + if node.closing_loc + bounds(node.closing_loc) + on_kw("end") + end statements = if node.statements.nil? bounds(node.location) @@ -3235,6 +4005,8 @@ module Prism on_while(predicate, statements) else statements = visit(node.statements.body.first) + bounds(node.keyword_loc) + on_kw("while") predicate = visit(node.predicate) bounds(node.location) @@ -3245,20 +4017,22 @@ module Prism # `foo` # ^^^^^ def visit_x_string_node(node) - if node.unescaped.empty? - bounds(node.location) - on_xstring_literal(on_xstring_new) - elsif node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node.to_interpolated) + with_string_bounds(node) do + if node.unescaped.empty? + bounds(node.location) + on_xstring_literal(on_xstring_new) + elsif node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node.to_interpolated) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.content_loc) - content = on_tstring_content(node.content) + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.content_loc) + content = on_tstring_content(node.content) - bounds(node.location) - on_xstring_literal(on_xstring_add(on_xstring_new, content)) + bounds(node.location) + on_xstring_literal(on_xstring_add(on_xstring_new, content)) + end end end @@ -3268,10 +4042,18 @@ module Prism # yield 1 # ^^^^^^^ def visit_yield_node(node) + bounds(node.keyword_loc) + on_kw("yield") + if node.arguments.nil? && node.lparen_loc.nil? bounds(node.location) on_yield0 else + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + arguments = if node.arguments.nil? bounds(node.location) @@ -3281,6 +4063,8 @@ module Prism end unless node.lparen_loc.nil? + bounds(node.rparen_loc) + on_rparen(")") bounds(node.lparen_loc) arguments = on_paren(arguments) end @@ -3294,7 +4078,11 @@ module Prism # Lazily initialize the parse result. def result - @result ||= Prism.parse(source, partial_script: true) + @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding) + end + + def line_and_column_cache + @line_and_column_cache ||= LineAndColumnCache.new(result.source) end ########################################################################## @@ -3315,30 +4103,34 @@ module Prism # Visit the string content of a particular node. This method is used to # split into the various token types. def visit_token(token, allow_keywords = true) - case token - when "." + if token == "." on_period(token) - when "`" + elsif token == "`" on_backtick(token) - when *(allow_keywords ? KEYWORDS : []) + elsif allow_keywords && KEYWORDS.include?(token) on_kw(token) - when /^_/ + elsif token.start_with?("_") on_ident(token) - when /^[[:upper:]]\w*$/ + elsif token.match?(/^[[:upper:]]\w*$/) on_const(token) - when /^@@/ + elsif token.start_with?("@@") on_cvar(token) - when /^@/ + elsif token.start_with?("@") on_ivar(token) - when /^\$/ + elsif token.start_with?("$") on_gvar(token) - when /^[[:punct:]]/ + elsif token.match?(/^[[:punct:]]/) on_op(token) else on_ident(token) end end + # Visit either `.`, `&.`, or `::`. + def visit_call_operator(token) + token == "." ? on_period(token) : on_op(token) + end + # Visit a node that represents a number. We need to explicitly handle the # unary - operator. def visit_number_node(node) @@ -3346,6 +4138,9 @@ module Prism location = node.location if slice[0] == "-" + bounds(location.copy(length: 1)) + on_op("-") + bounds(location.copy(start_offset: location.start_offset + 1)) value = yield slice[1..-1] @@ -3394,26 +4189,24 @@ module Prism # This method is responsible for updating lineno and column information # to reflect the current node. - # - # This method could be drastically improved with some caching on the start - # of every line, but for now it's good enough. def bounds(location) - @lineno = location.start_line - @column = location.start_column + @lineno, @column = line_and_column_cache.line_and_column(location.start_offset) end + # :startdoc: + ########################################################################## # Ripper interface ########################################################################## # :stopdoc: def _dispatch_0; end - def _dispatch_1(_); end - def _dispatch_2(_, _); end - def _dispatch_3(_, _, _); end - def _dispatch_4(_, _, _, _); end - def _dispatch_5(_, _, _, _, _); end - def _dispatch_7(_, _, _, _, _, _, _); end + def _dispatch_1(arg); arg end + def _dispatch_2(arg, _); arg end + def _dispatch_3(arg, _, _); arg end + def _dispatch_4(arg, _, _, _); arg end + def _dispatch_5(arg, _, _, _, _); arg end + def _dispatch_7(arg, _, _, _, _, _, _); arg end # :startdoc: # diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb new file mode 100644 index 0000000000..19deef2d37 --- /dev/null +++ b/lib/prism/translation/ripper/filter.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Ripper + class Filter # :nodoc: + # :stopdoc: + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + def filename + @__lexer.filename + end + + def lineno + @__line + end + + def column + @__col + end + + def state + @__state + end + + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + def on_default(event, token, data) + data + end + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb new file mode 100644 index 0000000000..c6aeae4bd7 --- /dev/null +++ b/lib/prism/translation/ripper/lexer.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true +# :markup: markdown + +require_relative "../ripper" + +module Prism + module Translation + class Ripper + class Lexer < Ripper # :nodoc: + class State # :nodoc: + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = Ripper.lex_state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + + # Instances are frozen and there are only a handful of them so we + # cache them here. + STATES = Hash.new { |hash, key| hash[key] = State.new(key) } + private_constant :STATES + + def self.[](i) + STATES[i] + end + end + + class Elem # :nodoc: + attr_accessor :pos, :event, :tok, :state, :message + + def initialize(pos, event, tok, state, message = nil) + @pos = pos + @event = event + @tok = tok + @state = State[state] + @message = message + end + + def [](index) + case index + when 0, :pos + @pos + when 1, :event + @event + when 2, :tok + @tok + when 3, :state + @state + when 4, :message + @message + else + nil + end + end + + def inspect + "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>" + end + + alias to_s inspect + + def pretty_print(q) + q.group(2, "#<#{self.class}:", ">") { + q.breakable + q.text("#{event}@#{pos[0]}:#{pos[1]}") + q.breakable + state.pretty_print(q) + q.breakable + q.text("token: ") + tok.pretty_print(q) + if message + q.breakable + q.text("message: ") + q.text(message) + end + } + end + + def to_a + if @message + [@pos, @event, @tok, @state, @message] + else + [@pos, @event, @tok, @state] + end + end + end + + # Pretty much just the same as Prism.lex_compat. + def lex(raise_errors: false) + Ripper.lex(@source, filename, lineno, raise_errors: raise_errors) + end + + # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. + # Since ripper is a streaming parser, tokens are expected to be emitted in the order + # that the parser encounters them. This is not implemented. + def parse(...) + lex(...).map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) + end + end + + # Similar to parse but ripper sorts the elements by position in the source. Also + # includes errors. Since prism does error recovery, in cases of syntax errors + # the result may differ greatly compared to ripper. + def scan(...) + parse(...) + end + end + end + end +end diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb index dc26a639a3..46c0333544 100644 --- a/lib/prism/translation/ripper/sexp.rb +++ b/lib/prism/translation/ripper/sexp.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown require_relative "../ripper" @@ -7,9 +8,7 @@ module Prism class Ripper # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that # returns the arrays of [type, *children]. - class SexpBuilder < Ripper - # :stopdoc: - + class SexpBuilder < Ripper # :nodoc: attr_reader :error private @@ -64,16 +63,12 @@ module Prism remove_method :on_parse_error alias on_parse_error on_error alias compile_error on_error - - # :startdoc: end # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that # returns the same values as ::Ripper::SexpBuilder except with a couple of # niceties that flatten linked lists into arrays. - class SexpBuilderPP < SexpBuilder - # :stopdoc: - + class SexpBuilderPP < SexpBuilder # :nodoc: private def on_heredoc_dedent(val, width) @@ -117,8 +112,6 @@ module Prism alias_method "on_#{event}", :_dispatch_event_push end end - - # :startdoc: end end end diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb index 10e21cd16a..00ed625da3 100644 --- a/lib/prism/translation/ripper/shim.rb +++ b/lib/prism/translation/ripper/shim.rb @@ -2,4 +2,6 @@ # This writes the prism ripper translation into the Ripper constant so that # users can transparently use Ripper without any changes. +# :stopdoc: Ripper = Prism::Translation::Ripper +# :startdoc: diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index 8784e22d10..42bc5ee658 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -1,21 +1,27 @@ # frozen_string_literal: true +# :markup: markdown begin - require "ruby_parser" + require "sexp" rescue LoadError - warn(%q{Error: Unable to load ruby_parser. Add `gem "ruby_parser"` to your Gemfile.}) + warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.}) exit(1) end +class RubyParser # :nodoc: + class SyntaxError < RuntimeError # :nodoc: + end +end + module Prism module Translation # This module is the entry-point for converting a prism syntax tree into the # seattlerb/ruby_parser gem's syntax tree. class RubyParser # A prism visitor that builds Sexp objects. - class Compiler < ::Prism::Compiler + class Compiler < ::Prism::Compiler # :nodoc: # This is the name of the file that we are compiling. We set it on every - # Sexp object that is generated, and also use it to compile __FILE__ + # Sexp object that is generated, and also use it to compile `__FILE__` # nodes. attr_reader :file @@ -131,7 +137,7 @@ module Prism # $+ # ^^ def visit_back_reference_read_node(node) - s(node, :back_ref, node.name.name.delete_prefix("$").to_sym) + s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym) end # begin end @@ -366,14 +372,18 @@ module Prism visit(node.constant_path) end - if node.body.nil? - s(node, :class, name, visit(node.superclass)) - elsif node.body.is_a?(StatementsNode) - compiler = copy_compiler(in_def: false) - s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) - else - s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) - end + result = + if node.body.nil? + s(node, :class, name, visit(node.superclass)) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) + end + + attach_comments(result, node) + result end # @@foo @@ -384,9 +394,6 @@ module Prism # @@foo = 1 # ^^^^^^^^^ - # - # @@foo, @@bar = 1 - # ^^^^^ ^^^^^ def visit_class_variable_write_node(node) s(node, class_variable_write_type, node.name, visit_write_value(node.value)) end @@ -524,7 +531,9 @@ module Prism s(node, :defs, visit(node.receiver), name) end + attach_comments(result, node) result.line(node.name_loc.start_line) + if node.parameters.nil? result << s(node, :args).line(node.name_loc.start_line) else @@ -639,9 +648,6 @@ module Prism # $foo = 1 # ^^^^^^^^ - # - # $foo, $bar = 1 - # ^^^^ ^^^^ def visit_global_variable_write_node(node) s(node, :gasgn, node.name, visit_write_value(node.value)) end @@ -787,9 +793,6 @@ module Prism # @foo = 1 # ^^^^^^^^ - # - # @foo, @bar = 1 - # ^^^^ ^^^^ def visit_instance_variable_write_node(node) s(node, :iasgn, node.name, visit_write_value(node.value)) end @@ -976,8 +979,8 @@ module Prism def visit_lambda_node(node) parameters = case node.parameters - when nil, NumberedParametersNode - s(node, :args) + when nil, ItParametersNode, NumberedParametersNode + 0 else visit(node.parameters) end @@ -1001,9 +1004,6 @@ module Prism # foo = 1 # ^^^^^^^ - # - # foo, bar = 1 - # ^^^ ^^^ def visit_local_variable_write_node(node) s(node, :lasgn, node.name, visit_write_value(node.value)) end @@ -1059,8 +1059,8 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. The parser gem doesn't have such a concept, so # we invent our own here. - def visit_missing_node(node) - raise "Cannot visit missing node directly" + def visit_error_recovery_node(node) + raise "Cannot visit error recovery node directly" end # module Foo; end @@ -1073,14 +1073,18 @@ module Prism visit(node.constant_path) end - if node.body.nil? - s(node, :module, name) - elsif node.body.is_a?(StatementsNode) - compiler = copy_compiler(in_def: false) - s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) - else - s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) - end + result = + if node.body.nil? + s(node, :module, name) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) + end + + attach_comments(result, node) + result end # foo, bar = baz @@ -1136,6 +1140,12 @@ module Prism s(node, :nil) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + :"&nil" + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) @@ -1188,7 +1198,7 @@ module Prism # ^^^^^^^^^ def visit_parameters_node(node) children = - node.compact_child_nodes.map do |element| + node.each_child_node.map do |element| if element.is_a?(MultiTargetNode) visit_destructured_parameter(element) else @@ -1537,6 +1547,17 @@ module Prism private + # Attach prism comments to the given sexp. + def attach_comments(sexp, node) + return unless node.comments + return if node.comments.empty? + + extra = node.location.start_line - node.comments.last.location.start_line + comments = node.comments.map(&:slice) + comments.concat([nil] * [0, extra].max) + sexp.comments = comments.join("\n") + end + # Create a new compiler with the given options. def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern) Compiler.new(file, in_def: in_def, in_pattern: in_pattern) @@ -1615,6 +1636,14 @@ module Prism translate(Prism.parse_file(filepath, partial_script: true), filepath) end + # Parse the give file and translate it into the + # seattlerb/ruby_parser gem's Sexp format. This method is + # provided for API compatibility to RubyParser and takes an + # optional +timeout+ argument. + def process(ruby, file = "(string)", timeout = nil) + Timeout.timeout(timeout) { parse(ruby, file) } + end + class << self # Parse the given source and translate it into the seattlerb/ruby_parser # gem's Sexp format. @@ -1639,6 +1668,7 @@ module Prism raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}" end + result.attach_comments! result.value.accept(Compiler.new(filepath)) end end |
