diff options
Diffstat (limited to 'lib/prism')
38 files changed, 5887 insertions, 2115 deletions
diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb deleted file mode 100644 index 74f824faa7..0000000000 --- a/lib/prism/debug.rb +++ /dev/null @@ -1,249 +0,0 @@ -# frozen_string_literal: true - -module Prism - # This module is used for testing and debugging and is not meant to be used by - # consumers of this library. - module Debug - # A wrapper around a RubyVM::InstructionSequence that provides a more - # convenient interface for accessing parts of the iseq. - class ISeq # :nodoc: - attr_reader :parts - - def initialize(parts) - @parts = parts - end - - def type - parts[0] - end - - def local_table - parts[10] - end - - def instructions - parts[13] - end - - def each_child - instructions.each do |instruction| - # Only look at arrays. Other instructions are line numbers or - # tracepoint events. - next unless instruction.is_a?(Array) - - instruction.each do |opnd| - # Only look at arrays. Other operands are literals. - next unless opnd.is_a?(Array) - - # Only look at instruction sequences. Other operands are literals. - next unless opnd[0] == "YARVInstructionSequence/SimpleDataFormat" - - yield ISeq.new(opnd) - end - end - end - end - - private_constant :ISeq - - # :call-seq: - # Debug::cruby_locals(source) -> Array - # - # For the given source, compiles with CRuby and returns a list of all of the - # sets of local variables that were encountered. - def self.cruby_locals(source) - verbose, $VERBOSE = $VERBOSE, nil - - begin - locals = [] #: Array[Array[Symbol | Integer]] - stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)] - - while (iseq = stack.pop) - names = [*iseq.local_table] - names.map!.with_index do |name, index| - # When an anonymous local variable is present in the iseq's local - # table, it is represented as the stack offset from the top. - # However, when these are dumped to binary and read back in, they - # are replaced with the symbol :#arg_rest. To consistently handle - # this, we replace them here with their index. - if name == :"#arg_rest" - names.length - index + 1 - else - name - end - end - - locals << names - iseq.each_child { |child| stack << child } - end - - locals - ensure - $VERBOSE = verbose - end - end - - # Used to hold the place of a local that will be in the local table but - # cannot be accessed directly from the source code. For example, the - # iteration variable in a for loop or the positional parameter on a method - # definition that is destructured. - AnonymousLocal = Object.new - private_constant :AnonymousLocal - - # :call-seq: - # Debug::prism_locals(source) -> Array - # - # For the given source, parses with prism and returns a list of all of the - # sets of local variables that were encountered. - def self.prism_locals(source) - locals = [] #: Array[Array[Symbol | Integer]] - stack = [Prism.parse(source).value] #: Array[Prism::node] - - while (node = stack.pop) - case node - when BlockNode, DefNode, LambdaNode - names = node.locals - params = - if node.is_a?(DefNode) - node.parameters - elsif node.parameters.is_a?(NumberedParametersNode) - nil - else - node.parameters&.parameters - end - - # prism places parameters in the same order that they appear in the - # source. CRuby places them in the order that they need to appear - # according to their own internal calling convention. We mimic that - # order here so that we can compare properly. - if params - sorted = [ - *params.requireds.map do |required| - if required.is_a?(RequiredParameterNode) - required.name - else - AnonymousLocal - end - end, - *params.optionals.map(&:name), - *((params.rest.name || :*) if params.rest && !params.rest.is_a?(ImplicitRestNode)), - *params.posts.map do |post| - if post.is_a?(RequiredParameterNode) - post.name - else - AnonymousLocal - end - end, - *params.keywords.grep(RequiredKeywordParameterNode).map(&:name), - *params.keywords.grep(OptionalKeywordParameterNode).map(&:name), - ] - - sorted << AnonymousLocal if params.keywords.any? - - if params.keyword_rest.is_a?(ForwardingParameterNode) - sorted.push(:*, :**, :&, :"...") - elsif params.keyword_rest.is_a?(KeywordRestParameterNode) - sorted << (params.keyword_rest.name || :**) - end - - # Recurse down the parameter tree to find any destructured - # parameters and add them after the other parameters. - param_stack = params.requireds.concat(params.posts).grep(MultiTargetNode).reverse - while (param = param_stack.pop) - case param - when MultiTargetNode - param_stack.concat(param.rights.reverse) - param_stack << param.rest if param.rest&.expression && !sorted.include?(param.rest.expression.name) - param_stack.concat(param.lefts.reverse) - when RequiredParameterNode - sorted << param.name - when SplatNode - sorted << param.expression.name - end - end - - if params.block - sorted << (params.block.name || :&) - end - - names = sorted.concat(names - sorted) - end - - names.map!.with_index do |name, index| - if name == AnonymousLocal - names.length - index + 1 - else - name - end - end - - locals << names - when ClassNode, ModuleNode, ProgramNode, SingletonClassNode - locals << node.locals - when ForNode - locals << [2] - when PostExecutionNode - locals.push([], []) - when InterpolatedRegularExpressionNode - locals << [] if node.once? - end - - stack.concat(node.compact_child_nodes) - end - - locals - end - - # :call-seq: - # Debug::newlines(source) -> Array - # - # For the given source string, return the byte offsets of every newline in - # the source. - def self.newlines(source) - Prism.parse(source).source.offsets - end - - # A wrapping around prism's internal encoding data structures. This is used - # for reflection and debugging purposes. - class Encoding - # The name of the encoding, that can be passed to Encoding.find. - attr_reader :name - - # Initialize a new encoding with the given name and whether or not it is - # a multibyte encoding. - def initialize(name, multibyte) - @name = name - @multibyte = multibyte - end - - # Whether or not the encoding is a multibyte encoding. - def multibyte? - @multibyte - end - - # Returns the number of bytes of the first character in the source string, - # if it is valid for the encoding. Otherwise, returns 0. - def width(source) - Encoding._width(name, source) - end - - # Returns true if the first character in the source string is a valid - # alphanumeric character for the encoding. - def alnum?(source) - Encoding._alnum?(name, source) - end - - # Returns true if the first character in the source string is a valid - # alphabetic character for the encoding. - def alpha?(source) - Encoding._alpha?(name, source) - end - - # Returns true if the first character in the source string is a valid - # uppercase character for the encoding. - def upper?(source) - Encoding._upper?(name, source) - end - end - end -end diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb index 9b62c00df3..c64d03f64a 100644 --- a/lib/prism/desugar_compiler.rb +++ b/lib/prism/desugar_compiler.rb @@ -1,122 +1,186 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism class DesugarAndWriteNode # :nodoc: - attr_reader :node, :source, :read_class, :write_class, :arguments + include DSL - def initialize(node, source, read_class, write_class, *arguments) + attr_reader :node #: ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + + #: ((ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void + def initialize(node, default_source, read_class, write_class, **arguments) @node = node - @source = source + @default_source = default_source @read_class = read_class @write_class = write_class @arguments = arguments end # Desugar `x &&= y` to `x && x = y` + #-- + #: () -> node def compile - AndNode.new( - source, - read_class.new(source, *arguments, node.name_loc), - write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location), - node.operator_loc, - node.location + and_node( + location: node.location, + left: public_send(read_class, location: node.name_loc, **arguments), + right: public_send( + write_class, + location: node.location, + **arguments, + name_loc: node.name_loc, + value: node.value, + operator_loc: node.operator_loc + ), + operator_loc: node.operator_loc ) end end class DesugarOrWriteDefinedNode # :nodoc: - attr_reader :node, :source, :read_class, :write_class, :arguments + include DSL + + attr_reader :node #: ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] - def initialize(node, source, read_class, write_class, *arguments) + #: ((ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void + def initialize(node, default_source, read_class, write_class, **arguments) @node = node - @source = source + @default_source = default_source @read_class = read_class @write_class = write_class @arguments = arguments end # Desugar `x ||= y` to `defined?(x) ? x : x = y` + #-- + #: () -> node def compile - IfNode.new( - source, - node.operator_loc, - DefinedNode.new(source, nil, read_class.new(source, *arguments, node.name_loc), nil, node.operator_loc, node.name_loc), - node.operator_loc, - StatementsNode.new(source, [read_class.new(source, *arguments, node.name_loc)], node.location), - ElseNode.new( - source, - node.operator_loc, - StatementsNode.new( - source, - [write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location)], - node.location + if_node( + location: node.location, + if_keyword_loc: node.operator_loc, + predicate: defined_node( + location: node.name_loc, + value: public_send(read_class, location: node.name_loc, **arguments), + keyword_loc: node.operator_loc + ), + then_keyword_loc: node.operator_loc, + statements: statements_node( + location: node.location, + body: [public_send(read_class, location: node.name_loc, **arguments)] + ), + subsequent: else_node( + location: node.location, + else_keyword_loc: node.operator_loc, + statements: statements_node( + location: node.location, + body: [ + public_send( + write_class, + location: node.location, + **arguments, + name_loc: node.name_loc, + value: node.value, + operator_loc: node.operator_loc + ) + ] ), - node.operator_loc, - node.location + end_keyword_loc: node.operator_loc ), - node.operator_loc, - node.location + end_keyword_loc: node.operator_loc ) end end class DesugarOperatorWriteNode # :nodoc: - attr_reader :node, :source, :read_class, :write_class, :arguments + include DSL - def initialize(node, source, read_class, write_class, *arguments) + attr_reader :node #: ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + + #: ((ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void + def initialize(node, default_source, read_class, write_class, **arguments) @node = node - @source = source + @default_source = default_source @read_class = read_class @write_class = write_class @arguments = arguments end # Desugar `x += y` to `x = x + y` + #-- + #: () -> node def compile - operator_loc = node.operator_loc.chop - - write_class.new( - source, - *arguments, - node.name_loc, - CallNode.new( - source, - 0, - read_class.new(source, *arguments, node.name_loc), - nil, - operator_loc.slice.to_sym, - operator_loc, - nil, - ArgumentsNode.new(source, 0, [node.value], node.value.location), - nil, - nil, - node.location + binary_operator_loc = node.binary_operator_loc.chop + + public_send( + write_class, + location: node.location, + **arguments, + name_loc: node.name_loc, + value: call_node( + location: node.location, + receiver: public_send( + read_class, + location: node.name_loc, + **arguments + ), + name: binary_operator_loc.slice.to_sym, + message_loc: binary_operator_loc, + arguments: arguments_node( + location: node.value.location, + arguments: [node.value] + ) ), - node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1), - node.location + operator_loc: node.binary_operator_loc.copy( + start_offset: node.binary_operator_loc.end_offset - 1, + length: 1 + ) ) end end class DesugarOrWriteNode # :nodoc: - attr_reader :node, :source, :read_class, :write_class, :arguments + include DSL - def initialize(node, source, read_class, write_class, *arguments) + attr_reader :node #: InstanceVariableOrWriteNode | LocalVariableOrWriteNode + attr_reader :default_source #: Source + attr_reader :read_class, :write_class #: Symbol + attr_reader :arguments #: Hash[Symbol, untyped] + + #: ((InstanceVariableOrWriteNode | LocalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void + def initialize(node, default_source, read_class, write_class, **arguments) @node = node - @source = source + @default_source = default_source @read_class = read_class @write_class = write_class @arguments = arguments end # Desugar `x ||= y` to `x || x = y` + #-- + #: () -> node def compile - OrNode.new( - source, - read_class.new(source, *arguments, node.name_loc), - write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location), - node.operator_loc, - node.location + or_node( + location: node.location, + left: public_send(read_class, location: node.name_loc, **arguments), + right: public_send( + write_class, + location: node.location, + **arguments, + name_loc: node.name_loc, + value: node.value, + operator_loc: node.operator_loc + ), + operator_loc: node.operator_loc ) end end @@ -124,229 +188,274 @@ module Prism private_constant :DesugarAndWriteNode, :DesugarOrWriteNode, :DesugarOrWriteDefinedNode, :DesugarOperatorWriteNode class ClassVariableAndWriteNode + #: () -> node def desugar # :nodoc: - DesugarAndWriteNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile + DesugarAndWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile end end class ClassVariableOrWriteNode + #: () -> node def desugar # :nodoc: - DesugarOrWriteDefinedNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile + DesugarOrWriteDefinedNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile end end class ClassVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: - DesugarOperatorWriteNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile + DesugarOperatorWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile end end class ConstantAndWriteNode + #: () -> node def desugar # :nodoc: - DesugarAndWriteNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile + DesugarAndWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile end end class ConstantOrWriteNode + #: () -> node def desugar # :nodoc: - DesugarOrWriteDefinedNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile + DesugarOrWriteDefinedNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile end end class ConstantOperatorWriteNode + #: () -> node def desugar # :nodoc: - DesugarOperatorWriteNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile + DesugarOperatorWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile end end class GlobalVariableAndWriteNode + #: () -> node def desugar # :nodoc: - DesugarAndWriteNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile + DesugarAndWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile end end class GlobalVariableOrWriteNode + #: () -> node def desugar # :nodoc: - DesugarOrWriteDefinedNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile + DesugarOrWriteDefinedNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile end end class GlobalVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: - DesugarOperatorWriteNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile + DesugarOperatorWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile end end class InstanceVariableAndWriteNode + #: () -> node def desugar # :nodoc: - DesugarAndWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile + DesugarAndWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile end end class InstanceVariableOrWriteNode + #: () -> node def desugar # :nodoc: - DesugarOrWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile + DesugarOrWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile end end class InstanceVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: - DesugarOperatorWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile + DesugarOperatorWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile end end class LocalVariableAndWriteNode + #: () -> node def desugar # :nodoc: - DesugarAndWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile + DesugarAndWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile end end class LocalVariableOrWriteNode + #: () -> node def desugar # :nodoc: - DesugarOrWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile + DesugarOrWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile end end class LocalVariableOperatorWriteNode + #: () -> node def desugar # :nodoc: - DesugarOperatorWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile + DesugarOperatorWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile end end # DesugarCompiler is a compiler that desugars Ruby code into a more primitive # form. This is useful for consumers that want to deal with fewer node types. class DesugarCompiler < MutationCompiler - # @@foo &&= bar + # `@@foo &&= bar` # # becomes # - # @@foo && @@foo = bar + # `@@foo && @@foo = bar` + #-- + #: (ClassVariableAndWriteNode node) -> node def visit_class_variable_and_write_node(node) node.desugar end - # @@foo ||= bar + # `@@foo ||= bar` # # becomes # - # defined?(@@foo) ? @@foo : @@foo = bar + # `defined?(@@foo) ? @@foo : @@foo = bar` + #-- + #: (ClassVariableOrWriteNode node) -> node def visit_class_variable_or_write_node(node) node.desugar end - # @@foo += bar + # `@@foo += bar` # # becomes # - # @@foo = @@foo + bar + # `@@foo = @@foo + bar` + #-- + #: (ClassVariableOperatorWriteNode node) -> node def visit_class_variable_operator_write_node(node) node.desugar end - # Foo &&= bar + # `Foo &&= bar` # # becomes # - # Foo && Foo = bar + # `Foo && Foo = bar` + #-- + #: (ConstantAndWriteNode node) -> node def visit_constant_and_write_node(node) node.desugar end - # Foo ||= bar + # `Foo ||= bar` # # becomes # - # defined?(Foo) ? Foo : Foo = bar + # `defined?(Foo) ? Foo : Foo = bar` + #-- + #: (ConstantOrWriteNode node) -> node def visit_constant_or_write_node(node) node.desugar end - # Foo += bar + # `Foo += bar` # # becomes # - # Foo = Foo + bar + # `Foo = Foo + bar` + #-- + #: (ConstantOperatorWriteNode node) -> node def visit_constant_operator_write_node(node) node.desugar end - # $foo &&= bar + # `$foo &&= bar` # # becomes # - # $foo && $foo = bar + # `$foo && $foo = bar` + #-- + #: (GlobalVariableAndWriteNode node) -> node def visit_global_variable_and_write_node(node) node.desugar end - # $foo ||= bar + # `$foo ||= bar` # # becomes # - # defined?($foo) ? $foo : $foo = bar + # `defined?($foo) ? $foo : $foo = bar` + #-- + #: (GlobalVariableOrWriteNode node) -> node def visit_global_variable_or_write_node(node) node.desugar end - # $foo += bar + # `$foo += bar` # # becomes # - # $foo = $foo + bar + # `$foo = $foo + bar` + #-- + #: (GlobalVariableOperatorWriteNode node) -> node def visit_global_variable_operator_write_node(node) node.desugar end - # @foo &&= bar + # `@foo &&= bar` # # becomes # - # @foo && @foo = bar + # `@foo && @foo = bar` + #-- + #: (InstanceVariableAndWriteNode node) -> node def visit_instance_variable_and_write_node(node) node.desugar end - # @foo ||= bar + # `@foo ||= bar` # # becomes # - # @foo || @foo = bar + # `@foo || @foo = bar` + #-- + #: (InstanceVariableOrWriteNode node) -> node def visit_instance_variable_or_write_node(node) node.desugar end - # @foo += bar + # `@foo += bar` # # becomes # - # @foo = @foo + bar + # `@foo = @foo + bar` + #-- + #: (InstanceVariableOperatorWriteNode node) -> node def visit_instance_variable_operator_write_node(node) node.desugar end - # foo &&= bar + # `foo &&= bar` # # becomes # - # foo && foo = bar + # `foo && foo = bar` + #-- + #: (LocalVariableAndWriteNode node) -> node def visit_local_variable_and_write_node(node) node.desugar end - # foo ||= bar + # `foo ||= bar` # # becomes # - # foo || foo = bar + # `foo || foo = bar` + #-- + #: (LocalVariableOrWriteNode node) -> node def visit_local_variable_or_write_node(node) node.desugar end - # foo += bar + # `foo += bar` # # becomes # - # foo = foo + bar + # `foo = foo + bar` + #-- + #: (LocalVariableOperatorWriteNode node) -> node def visit_local_variable_operator_write_node(node) node.desugar end diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 0a064a5c94..6b9bde51ea 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown # typed: ignore # This file is responsible for mirroring the API provided by the C extension by @@ -7,13 +8,26 @@ require "rbconfig" require "ffi" -module Prism +# We want to eagerly load this file if there are Ractors so that it does not get +# autoloaded from within a non-main Ractor. +require "prism/serialize" if defined?(Ractor) + +module Prism # :nodoc: module LibRubyParser # :nodoc: extend FFI::Library # Define the library that we will be pulling functions from. Note that this # must align with the build shared library from make/rake. - ffi_lib File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) + libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) + libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}" + + if File.exist?(libprism_in_build) + INCLUDE_DIR = File.expand_path("../../include", __dir__) + ffi_lib libprism_in_build + else + INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include" + ffi_lib libprism_in_libdir + end # Convert a native C type declaration into a symbol that FFI understands. # For example: @@ -38,13 +52,16 @@ module Prism # given functions. For each one, define a function with the same name and # signature as the C function. def self.load_exported_functions_from(header, *functions, callbacks) - File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line| + File.foreach("#{INCLUDE_DIR}/#{header}") do |line| # We only want to attempt to load exported functions. next unless line.start_with?("PRISM_EXPORTED_FUNCTION ") # We only want to load the functions that we are interested in. next unless functions.any? { |function| line.include?(function) } + # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) + line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") + # Parse the function declaration. unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line raise "Could not parse #{line}" @@ -71,24 +88,44 @@ module Prism raise "Could not find functions #{functions.inspect}" unless functions.empty? end - callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_source_stream_feof_t, [:pointer], :int + pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] + enum :pm_source_init_result_t, pm_source_init_result_values + enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] + + # Ractor-safe lookup table for pm_source_init_result_t, since FFI's + # enum_type accesses module instance variables that are not shareable. + SOURCE_INIT_RESULT = pm_source_init_result_values.freeze load_exported_functions_from( - "prism.h", + "prism/version.h", "pm_version", + [] + ) + + load_exported_functions_from( + "prism/serialize.h", "pm_serialize_parse", "pm_serialize_parse_stream", "pm_serialize_parse_comments", "pm_serialize_lex", "pm_serialize_parse_lex", - "pm_parse_success_p", - [:pm_parse_stream_fgets_t] + "pm_serialize_parse_success_p", + [] ) load_exported_functions_from( - "prism/util/pm_buffer.h", - "pm_buffer_sizeof", - "pm_buffer_init", + "prism/string_query.h", + "pm_string_query_local", + "pm_string_query_constant", + "pm_string_query_method_name", + [] + ) + + load_exported_functions_from( + "prism/buffer.h", + "pm_buffer_new", "pm_buffer_value", "pm_buffer_length", "pm_buffer_free", @@ -96,20 +133,19 @@ module Prism ) load_exported_functions_from( - "prism/util/pm_string.h", - "pm_string_mapped_init", - "pm_string_free", - "pm_string_source", - "pm_string_length", - "pm_string_sizeof", - [] + "prism/source.h", + "pm_source_file_new", + "pm_source_mapped_new", + "pm_source_stream_new", + "pm_source_free", + "pm_source_source", + "pm_source_length", + [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] ) # This object represents a pm_buffer_t. We only use it as an opaque pointer, # so it doesn't need to know the fields of pm_buffer_t. class PrismBuffer # :nodoc: - SIZEOF = LibRubyParser.pm_buffer_sizeof - attr_reader :pointer def initialize(pointer) @@ -131,19 +167,22 @@ module Prism # Initialize a new buffer and yield it to the block. The buffer will be # automatically freed when the block returns. def self.with - FFI::MemoryPointer.new(SIZEOF) do |pointer| - raise unless LibRubyParser.pm_buffer_init(pointer) - return yield new(pointer) + buffer = LibRubyParser.pm_buffer_new + raise unless buffer + + begin + yield new(buffer) ensure - LibRubyParser.pm_buffer_free(pointer) + LibRubyParser.pm_buffer_free(buffer) end end end - # This object represents a pm_string_t. We only use it as an opaque pointer, - # so it doesn't have to be an FFI::Struct. - class PrismString # :nodoc: - SIZEOF = LibRubyParser.pm_string_sizeof + # This object represents source code to be parsed. For strings it wraps a + # pointer directly; for files it uses a pm_source_t under the hood. + class PrismSource # :nodoc: + PLATFORM_EXPECTS_UTF8 = + RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) attr_reader :pointer, :length @@ -158,7 +197,7 @@ module Prism @pointer.read_string(@length) end - # Yields a pm_string_t pointer to the given block. + # Yields a PrismSource backed by the given string to the block. def self.with_string(string) raise TypeError unless string.is_a?(String) @@ -172,20 +211,38 @@ module Prism end end - # Yields a pm_string_t pointer to the given block. + # Yields a PrismSource to the given block, backed by a pm_source_t. def self.with_file(filepath) raise TypeError unless filepath.is_a?(String) - FFI::MemoryPointer.new(SIZEOF) do |pm_string| - if LibRubyParser.pm_string_mapped_init(pm_string, filepath) - pointer = LibRubyParser.pm_string_source(pm_string) - length = LibRubyParser.pm_string_length(pm_string) + # On Windows and Mac, it's expected that filepaths will be encoded in + # UTF-8. If they are not, we need to convert them to UTF-8 before + # passing them into pm_source_mapped_new. + if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 + filepath = filepath.encode(Encoding::UTF_8) + end + + FFI::MemoryPointer.new(:int) do |result_ptr| + pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) + + case SOURCE_INIT_RESULT[result_ptr.read_int] + when :PM_SOURCE_INIT_SUCCESS + pointer = LibRubyParser.pm_source_source(pm_source) + length = LibRubyParser.pm_source_length(pm_source) return yield new(pointer, length, false) - else + when :PM_SOURCE_INIT_ERROR_GENERIC raise SystemCallError.new(filepath, FFI.errno) + when :PM_SOURCE_INIT_ERROR_DIRECTORY + raise Errno::EISDIR.new(filepath) + when :PM_SOURCE_INIT_ERROR_NON_REGULAR + # Fall back to reading the file through Ruby IO for non-regular + # files (pipes, character devices, etc.) + return with_string(File.read(filepath)) { |string| yield string } + else + raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" end ensure - LibRubyParser.pm_string_free(pm_string) + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? end end end @@ -196,34 +253,34 @@ module Prism private_constant :LibRubyParser # The version constant is set by reading the result of calling pm_version. - VERSION = LibRubyParser.pm_version.read_string + VERSION = LibRubyParser.pm_version.read_string.freeze class << self # Mirror the Prism.dump API by using the serialization API. - def dump(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) } + def dump(source, **options) + LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) } end # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) } end # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) } end # Mirror the Prism.parse API by using the serialization API. def parse(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) } end # Mirror the Prism.parse_file API by using the serialization API. This uses @@ -231,7 +288,7 @@ module Prism # when it is available. def parse_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) } end # Mirror the Prism.parse_stream API by using the serialization API. @@ -247,19 +304,21 @@ module Prism end } - # In the pm_serialize_parse_stream function it accepts a pointer to the - # IO object as a void* and then passes it through to the callback as the - # third argument, but it never touches it itself. As such, since we have - # access to the IO object already through the closure of the lambda, we - # can pass a null pointer here and not worry. - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options)) - Prism.load(source, buffer.read) + eof_callback = -> (_) { stream.eof? } + + pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) + begin + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end end end # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) } end # Mirror the Prism.parse_file_comments API by using the serialization @@ -267,29 +326,60 @@ module Prism # to use mmap when it is available. def parse_file_comments(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } end # Mirror the Prism.parse_lex API by using the serialization API. def parse_lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) } end # Mirror the Prism.parse_lex_file API by using the serialization API. def parse_lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } end # Mirror the Prism.parse_success? API by using the serialization API. def parse_success?(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } + LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_failure? API by using the serialization API. + def parse_failure?(code, **options) + !parse_success?(code, **options) end # Mirror the Prism.parse_file_success? API by using the serialization API. def parse_file_success?(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } + LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_file_failure? API by using the serialization API. + def parse_file_failure?(filepath, **options) + !parse_file_success?(filepath, **options) + end + + # Mirror the Prism.profile API by using the serialization API. + def profile(source, **options) + LibRubyParser::PrismSource.with_string(source) do |string| + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + nil + end + end + end + + # Mirror the Prism.profile_file API by using the serialization API. + def profile_file(filepath, **options) + LibRubyParser::PrismSource.with_file(filepath) do |string| + LibRubyParser::PrismBuffer.with do |buffer| + options[:filepath] = filepath + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + nil + end + end end private @@ -297,55 +387,42 @@ module Prism def dump_common(string, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) - buffer.read + + dumped = buffer.read + dumped.freeze if options.fetch(:freeze, false) + + dumped end end def lex_common(string, code, options) # :nodoc: - serialized = LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - buffer.read + Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) end - - Serialize.load_tokens(Source.new(code), serialized) end def parse_common(string, code, options) # :nodoc: serialized = dump_common(string, options) - Prism.load(code, serialized) + Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) end def parse_comments_common(string, code, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) - - source = Source.new(code) - loader = Serialize::Loader.new(source, buffer.read) - - loader.load_header - loader.load_encoding - loader.load_start_line - loader.load_comments + Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) end end def parse_lex_common(string, code, options) # :nodoc: LibRubyParser::PrismBuffer.with do |buffer| LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - - source = Source.new(code) - loader = Serialize::Loader.new(source, buffer.read) - - tokens = loader.load_tokens - node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes - tokens.each { |token,| token.value.force_encoding(loader.encoding) } - - ParseResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source) + Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) end end def parse_file_success_common(string, options) # :nodoc: - LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) + LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) end # Return the value that should be dumped for the command_line option. @@ -366,6 +443,41 @@ module Prism end end + # Return the value that should be dumped for the version option. + def dump_options_version(version) + case version + when "current" + version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) + when "latest", nil + 0 # Handled in pm_parser_init + when "nearest" + dump = version_string_to_number(RUBY_VERSION) + return dump if dump + if RUBY_VERSION < "3.3" + version_string_to_number("3.3") + else + 0 # Handled in pm_parser_init + end + else + version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") + end + end + + # Converts a version string like "4.0.0" or "4.0" into a number. + # Returns nil if the version is unknown. + def version_string_to_number(version) + case version + when /\A3\.3(\.\d+)?\z/ + 1 + when /\A3\.4(\.\d+)?\z/ + 2 + when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ + 3 + when /\A4\.1(\.\d+)?\z/ + 4 + end + end + # Convert the given options into a serialized options string. def dump_options(options) template = +"" @@ -384,7 +496,7 @@ module Prism template << "L" if (encoding = options[:encoding]) - name = encoding.name + name = encoding.is_a?(Encoding) ? encoding.name : encoding values.push(name.bytesize, name.b) template << "A*" else @@ -398,17 +510,54 @@ module Prism values << dump_options_command_line(options) template << "C" - values << { nil => 0, "3.3.0" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version]) + values << dump_options_version(options[:version]) + + template << "C" + values << (options[:encoding] == false ? 1 : 0) + + template << "C" + values << (options.fetch(:main_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:partial_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:freeze, false) ? 1 : 0) template << "L" if (scopes = options[:scopes]) values << scopes.length scopes.each do |scope| + locals = nil + forwarding = 0 + + case scope + when Array + locals = scope + when Scope + locals = scope.locals + + scope.forwarding.each do |forward| + case forward + when :* then forwarding |= 0x1 + when :** then forwarding |= 0x2 + when :& then forwarding |= 0x4 + when :"..." then forwarding |= 0x8 + else raise ArgumentError, "invalid forwarding value: #{forward}" + end + end + else + raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" + end + template << "L" - values << scope.length + values << locals.length + + template << "C" + values << forwarding - scope.each do |local| + locals.each do |local| name = local.name template << "L" values << name.bytesize @@ -424,4 +573,39 @@ module Prism values.pack(template) end end + + # Here we are going to patch StringQuery to put in the class-level methods so + # that it can maintain a consistent interface + class StringQuery # :nodoc: + class << self + # Mirrors the C extension's StringQuery::local? method. + def local?(string) + query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name)) + end + + # Mirrors the C extension's StringQuery::constant? method. + def constant?(string) + query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name)) + end + + # Mirrors the C extension's StringQuery::method_name? method. + def method_name?(string) + query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name)) + end + + private + + # Parse the enum result and return an appropriate boolean. + def query(result) + case result + when :PM_STRING_QUERY_ERROR + raise ArgumentError, "Invalid or non ascii-compatible encoding" + when :PM_STRING_QUERY_FALSE + false + when :PM_STRING_QUERY_TRUE + true + end + end + end + end end diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 70cb065201..7aacec037d 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -1,15 +1,68 @@ # frozen_string_literal: true - -require "delegate" -require "ripper" +# :markup: markdown +#-- +# rbs_inline: enabled module Prism + # @rbs! + # module Translation + # class Ripper + # EXPR_NONE: Integer + # EXPR_BEG: Integer + # EXPR_MID: Integer + # EXPR_END: Integer + # EXPR_CLASS: Integer + # EXPR_VALUE: Integer + # EXPR_ARG: Integer + # EXPR_CMDARG: Integer + # EXPR_ENDARG: Integer + # EXPR_ENDFN: Integer + # + # class Lexer < Ripper + # class State + # def self.[]: (Integer value) -> State + # end + # end + # + # class LineAndColumnCache + # def initialize: (Source source) -> void + # + # def line_and_column: (Integer byte_offset) -> [Integer, Integer] + # end + # end + # end + # This class is responsible for lexing the source using prism and then # converting those tokens to be compatible with Ripper. In the vast majority # of cases, this is a one-to-one mapping of the token type. Everything else # generally lines up. However, there are a few cases that require special # handling. class LexCompat # :nodoc: + # @rbs! + # # A token produced by the Ripper lexer that Prism is replicating. + # type lex_compat_token = [[Integer, Integer], Symbol, String, untyped] + + # A result class specialized for holding tokens produced by the lexer. + class Result < Prism::Result + # The list of tokens that were produced by the lexer. + attr_reader :value #: Array[lex_compat_token] + + # Create a new lex compat result object with the given values. + #-- + #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) + end + + # Implement the hash pattern matching interface for Result. + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: + super.merge!(value: value) + end + end + # This is a mapping of prism token types to Ripper token types. This is a # many-to-one mapping because we split up our token types, whereas Ripper # tends to group them. @@ -87,6 +140,7 @@ module Prism KEYWORD_DEF: :on_kw, KEYWORD_DEFINED: :on_kw, KEYWORD_DO: :on_kw, + KEYWORD_DO_BLOCK: :on_kw, KEYWORD_DO_LOOP: :on_kw, KEYWORD_ELSE: :on_kw, KEYWORD_ELSIF: :on_kw, @@ -181,93 +235,6 @@ module Prism "__END__": :on___end__ }.freeze - # When we produce tokens, we produce the same arrays that Ripper does. - # However, we add a couple of convenience methods onto them to make them a - # little easier to work with. We delegate all other methods to the array. - class Token < SimpleDelegator - # @dynamic initialize, each, [] - - # The location of the token in the source. - def location - self[0] - end - - # The type of the token. - def event - self[1] - end - - # The slice of the source that this token represents. - def value - self[2] - end - - # The state of the lexer when this token was produced. - def state - self[3] - end - end - - # Ripper doesn't include the rest of the token in the event, so we need to - # trim it down to just the content on the first line when comparing. - class EndContentToken < Token - def ==(other) # :nodoc: - [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other - end - end - - # Tokens where state should be ignored - # used for :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) # :nodoc: - self[0...-1] == other[0...-1] - end - end - - # Ident tokens for the most part are exactly the same, except sometimes we - # know an ident is a local when ripper doesn't (when they are introduced - # through named captures in regular expressions). In that case we don't - # compare the state. - class IdentToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || - (other[3] & Ripper::EXPR_ARG_ANY != 0) - ) - end - end - - # Ignored newlines can occasionally have a LABEL state attached to them, so - # we compare the state differently here. - class IgnoredNewlineToken < Token - def ==(other) # :nodoc: - return false unless self[0...-1] == other[0...-1] - - if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED - other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0 - else - self[3] == other[3] - end - end - end - - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a parent - # scope named bar because it hasn't pushed the local table yet. We do this - # more accurately, so we need to allow comparing against both END and - # END|LABEL. - class ParamToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_END) || - (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) - ) - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -277,16 +244,19 @@ module Prism # order back into the token stream and set the state of the last token to # the state that the heredoc was opened in. class PlainHeredoc # :nodoc: - attr_reader :tokens + attr_reader :tokens #: Array[lex_compat_token] + #: () -> void def initialize @tokens = [] end + #: (lex_compat_token token) -> void def <<(token) tokens << token end + #: () -> Array[lex_compat_token] def to_a tokens end @@ -296,22 +266,26 @@ module Prism # that need to be split on "\\\n" to mimic Ripper's behavior. We also need # to keep track of the state that the heredoc was opened in. class DashHeredoc # :nodoc: - attr_reader :split, :tokens + attr_reader :split #: bool + attr_reader :tokens #: Array[lex_compat_token] + #: (bool split) -> void def initialize(split) @split = split @tokens = [] end + #: (lex_compat_token token) -> void def <<(token) tokens << token end + #: () -> Array[lex_compat_token] def to_a embexpr_balance = 0 - tokens.each_with_object([]) do |token, results| #$ Array[Token] - case token.event + tokens.each_with_object([]) do |token, results| #$ Array[lex_compat_token] + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -326,9 +300,9 @@ module Prism if split # Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind # to keep the delimiter in the result. - token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| + token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += value.count("\n") end else @@ -357,8 +331,13 @@ module Prism class DedentingHeredoc # :nodoc: TAB_WIDTH = 8 - attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance + attr_reader :tokens #: Array[lex_compat_token] + attr_reader :dedent_next #: bool + attr_reader :dedent #: Integer? + attr_reader :embexpr_balance #: Integer + # @rbs @ended_on_newline: bool + #: () -> void def initialize @tokens = [] @dedent_next = true @@ -370,8 +349,10 @@ module Prism # As tokens are coming in, we track the minimum amount of common leading # whitespace on plain string content tokens. This allows us to later # remove that amount of whitespace from the beginning of each line. + # + #: (lex_compat_token token) -> void def <<(token) - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg @embexpr_balance += 1 @dedent = 0 if @dedent_next && @ended_on_newline @@ -379,10 +360,10 @@ module Prism @embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - line = token.value + line = token[2] if dedent_next && !(line.strip.empty? && line.end_with?("\n")) - leading = line[/\A(\s*)\n?/, 1] + leading = line[/\A(\s*)\n?/, 1] #: String next_dedent = 0 leading.each_char do |char| @@ -402,20 +383,21 @@ module Prism end end - @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0 + @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0 @ended_on_newline = false tokens << token end + #: () -> Array[lex_compat_token] def to_a # If every line in the heredoc is blank, we still need to split up the # string content token into multiple tokens. if dedent.nil? - results = [] #: Array[Token] + results = [] #: Array[lex_compat_token] embexpr_balance = 0 tokens.each do |token| - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 results << token @@ -427,9 +409,9 @@ module Prism lineno = token[0][0] column = token[0][1] - token.value.split(/(?<=\n)/).each_with_index do |value, index| + token[2].split(/(?<=\n)/).each_with_index do |value, index| column = 0 if index > 0 - results << Token.new([[lineno, column], :on_tstring_content, value, token.state]) + results << [[lineno, column], :on_tstring_content, value, token[3]] lineno += 1 end else @@ -446,7 +428,7 @@ module Prism # If the minimum common whitespace is 0, then we need to concatenate # string nodes together that are immediately adjacent. if dedent == 0 - results = [] #: Array[Token] + results = [] #: Array[lex_compat_token] embexpr_balance = 0 index = 0 @@ -457,15 +439,15 @@ module Prism results << token index += 1 - case token.event + case token[1] when :on_embexpr_beg, :on_heredoc_beg embexpr_balance += 1 when :on_embexpr_end, :on_heredoc_end embexpr_balance -= 1 when :on_tstring_content if embexpr_balance == 0 - while index < max_index && tokens[index].event == :on_tstring_content - token.value << tokens[index].value + while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/) + token[2] << tokens[index][2] index += 1 end end @@ -479,7 +461,7 @@ module Prism # insert on_ignored_sp tokens for the amount of dedent that we need to # perform. We also need to remove the dedent from the beginning of # each line of plain string content tokens. - results = [] #: Array[Token] + results = [] #: Array[lex_compat_token] dedent_next = true embexpr_balance = 0 @@ -488,7 +470,7 @@ module Prism # whitespace calculation we performed above. This is because # checking if the subsequent token needs to be dedented is common to # both the dedent calculation and the ignored_sp insertion. - case token.event + case token[1] when :on_embexpr_beg embexpr_balance += 1 results << token @@ -500,7 +482,7 @@ module Prism # Here we're going to split the string on newlines, but maintain # the newlines in the resulting array. We'll do that with a look # behind assertion. - splits = token.value.split(/(?<=\n)/) + splits = token[2].split(/(?<=\n)/) index = 0 while index < splits.length @@ -518,7 +500,8 @@ module Prism # line or this line doesn't start with whitespace, then we # should concatenate the rest of the string to match ripper. if dedent == 0 && (!dedent_next || !line.start_with?(/\s/)) - line = splits[index..].join + unjoined = splits[index..] #: Array[String] + line = unjoined.join index = splits.length end @@ -557,12 +540,12 @@ module Prism ignored = deleted_chars.join line.delete_prefix!(ignored) - results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]]) + results << [[lineno, 0], :on_ignored_sp, ignored, token[3]] column = ignored.length end end - results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty? + results << [[lineno, column], token[1], line, token[3]] unless line.empty? index += 1 end else @@ -573,7 +556,7 @@ module Prism end dedent_next = - ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) && + ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) && embexpr_balance == 0 end @@ -583,12 +566,14 @@ module Prism # Here we will split between the two types of heredocs and return the # object that will store their tokens. + #-- + #: (lex_compat_token opening) -> (PlainHeredoc | DashHeredoc | DedentingHeredoc) def self.build(opening) - case opening.value[2] + case opening[2][2] when "~" DedentingHeredoc.new when "-" - DashHeredoc.new(opening.value[3] != "'") + DashHeredoc.new(opening[2][3] != "'") else PlainHeredoc.new end @@ -597,33 +582,43 @@ module Prism private_constant :Heredoc - attr_reader :source, :options + # In previous versions of Ruby, Ripper wouldn't flush the bom before the + # first token, so we had to have a hack in place to account for that. + BOM_FLUSHED = RUBY_VERSION >= "3.3.0" + private_constant :BOM_FLUSHED + + attr_reader :options #: Hash[Symbol, untyped] + # @rbs @source: String + #: (String source, **untyped options) -> void def initialize(source, **options) @source = source @options = options end + #: () -> Result def result - tokens = [] #: Array[LexCompat::Token] + tokens = [] #: Array[lex_compat_token] state = :default heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]] - result = Prism.lex(source, **options) + result = Prism.lex(@source, **options) + source = result.source result_value = result.value - previous_state = nil #: Ripper::Lexer::State? + previous_state = nil #: Translation::Ripper::Lexer::State? last_heredoc_end = nil #: Integer? + eof_token = nil #: Token? - # In previous versions of Ruby, Ripper wouldn't flush the bom before the - # first token, so we had to have a hack in place to account for that. This - # checks for that behavior. - bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 - bom = source.byteslice(0..2) == "\xEF\xBB\xBF" + bom = source.slice(0, 3) == "\xEF\xBB\xBF" - result_value.each_with_index do |(token, lex_state), index| - lineno = token.location.start_line - column = token.location.start_column + result_value.each_with_index do |(prism_token, prism_state), index| + lineno = prism_token.location.start_line + column = prism_token.location.start_column + + event = RIPPER.fetch(prism_token.type) + value = prism_token.value + lex_state = Translation::Ripper::Lexer::State[prism_state] # If there's a UTF-8 byte-order mark as the start of the file, then for # certain tokens ripper sets the first token back by 3 bytes. It also @@ -633,70 +628,53 @@ module Prism if bom && lineno == 1 column -= 3 - if index == 0 && column == 0 && !bom_flushed + if index == 0 && column == 0 && !BOM_FLUSHED flushed = - case token.type + case prism_token.type when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, :GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I, :PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I, :PERCENT_UPPER_W, :STRING_BEGIN true when :REGEXP_BEGIN, :SYMBOL_BEGIN - token.value.start_with?("%") + value.start_with?("%") else false end unless flushed column -= 3 - value = token.value value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding)) end end end - event = RIPPER.fetch(token.type) - value = token.value - lex_state = Ripper::Lexer::State.new(lex_state) - - token = + lex_compat_token = case event when :on___end__ - EndContentToken.new([[lineno, column], event, value, lex_state]) + # Ripper doesn't include the rest of the token in the event, so we need to + # trim it down to just the content on the first line. + value = value[0..value.index("\n")] #: String + [[lineno, column], event, value, lex_state] when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_heredoc_end # Heredoc end tokens can be emitted in an odd order, so we don't # want to bother comparing the state on them. - last_heredoc_end = token.location.end_offset - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ident - if lex_state == Ripper::EXPR_END - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a - # parent scope named bar because it hasn't pushed the local table - # yet. We do this more accurately, so we need to allow comparing - # against both END and END|LABEL. - ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL - # In the event that we're comparing identifiers, we're going to - # allow a little divergence. Ripper doesn't account for local - # variables introduced through named captures in regexes, and we - # do, which accounts for this difference. - IdentToken.new([[lineno, column], event, value, lex_state]) - else - Token.new([[lineno, column], event, value, lex_state]) - end + last_heredoc_end = prism_token.location.end_offset + [[lineno, column], event, value, lex_state] when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ignored_nl - # Ignored newlines can occasionally have a LABEL state attached to - # them which doesn't actually impact anything. We don't mirror that - # state so we ignored it. - IgnoredNewlineToken.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] + when :on_words_sep + # Ripper emits one token each per line. + value.each_line.with_index do |line, index| + if index > 0 + lineno += 1 + column = 0 + end + tokens << [[lineno, column], event, line, lex_state] + end + tokens.pop #: lex_compat_token when :on_regexp_end # On regex end, Ripper scans and then sets end state, so the ripper # lexed output is begin, when it should be end. prism sets lex state @@ -721,13 +699,14 @@ module Prism counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Ripper::Lexer::State.new(result_value[current_index][1]) + Translation::Ripper::Lexer::State[result_value[current_index][1]] else previous_state end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] when :on_eof + eof_token = prism_token previous_token = result_value[index - 1][0] # If we're at the end of the file and the previous token was a @@ -742,7 +721,7 @@ module Prism # Use the greater offset of the two to determine the start of # the trailing whitespace. start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max - end_offset = token.location.start_offset + end_offset = prism_token.location.start_offset if start_offset < end_offset if bom @@ -750,14 +729,14 @@ module Prism end_offset += 3 end - tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state]) + tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state] end end - Token.new([[lineno, column], event, value, lex_state]) + [[lineno, column], event, value, lex_state] else - Token.new([[lineno, column], event, value, lex_state]) - end + [[lineno, column], event, value, lex_state] + end #: lex_compat_token previous_state = lex_state @@ -774,19 +753,19 @@ module Prism when :default # The default state is when there are no heredocs at all. In this # state we can append the token to the list of tokens and move on. - tokens << token + tokens << lex_compat_token # If we get the declaration of a heredoc, then we open a new heredoc # and move into the heredoc_opened state. if event == :on_heredoc_beg state = :heredoc_opened - heredoc_stack.last << Heredoc.build(token) + heredoc_stack.last << Heredoc.build(lex_compat_token) end when :heredoc_opened # The heredoc_opened state is when we've seen the declaration of a # heredoc and are now lexing the body of the heredoc. In this state we # push tokens onto the most recently created heredoc. - heredoc_stack.last.last << token + heredoc_stack.last.last << lex_compat_token case event when :on_heredoc_beg @@ -794,7 +773,7 @@ module Prism # heredoc, this means we have nested heredocs. In this case we'll # push a new heredoc onto the stack and stay in the heredoc_opened # state since we're now lexing the body of the new heredoc. - heredoc_stack << [Heredoc.build(token)] + heredoc_stack << [Heredoc.build(lex_compat_token)] when :on_heredoc_end # If we receive the end of a heredoc, then we're done lexing the # body of the heredoc. In this case we now have a completed heredoc @@ -803,10 +782,10 @@ module Prism state = :heredoc_closed end when :heredoc_closed - if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n")) + if %i[on_nl on_ignored_nl on_comment].include?(event) || ((event == :on_tstring_content) && value.end_with?("\n")) if heredoc_stack.size > 1 - flushing = heredoc_stack.pop - heredoc_stack.last.last << token + flushing = heredoc_stack.pop #: Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc] + heredoc_stack.last.last << lex_compat_token flushing.each do |heredoc| heredoc.to_a.each do |flushed_token| @@ -818,12 +797,12 @@ module Prism next end elsif event == :on_heredoc_beg - tokens << token + tokens << lex_compat_token state = :heredoc_opened - heredoc_stack.last << Heredoc.build(token) + heredoc_stack.last << Heredoc.build(lex_compat_token) next elsif heredoc_stack.size > 1 - heredoc_stack[-2].last << token + heredoc_stack[-2].last << lex_compat_token next end @@ -834,77 +813,94 @@ module Prism heredoc_stack.last.clear state = :default - tokens << token + tokens << lex_compat_token end end - # Drop the EOF token from the list - tokens = tokens[0...-1] - - # We sort by location to compare against Ripper's output - tokens.sort_by!(&:location) - - ParseResult.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.new(source)) - end - end + # Drop the EOF token from the list. The EOF token may not be + # present if the source was syntax invalid + if tokens.dig(-1, 1) == :on_eof + tokens = tokens[0...-1] #: Array[lex_compat_token] + end - private_constant :LexCompat + # We sort by location because Ripper.lex sorts. + tokens.sort_by! do |token| + line, column = token[0] + source.byte_offset(line, column) + end - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source + tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token) - def initialize(source) - @source = source + Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, result.continuable?, source) end - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_sp - # skip - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - when :on_words_sep - if previous[1] == :on_words_sep - previous[2] << token[2] + private + + #: (Array[lex_compat_token] tokens, Source source, Location? data_loc, bool bom, Token? eof_token) -> Array[lex_compat_token] + def post_process_tokens(tokens, source, data_loc, bom, eof_token) + new_tokens = [] #: Array[lex_compat_token] + + prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] + prev_token_end = bom ? 3 : 0 + + cache = Translation::Ripper::LineAndColumnCache.new(source) + + tokens.each do |token| + # Skip missing heredoc ends. + next if token[1] == :on_heredoc_end && token[2] == "" + + # Add :on_sp tokens. + line, column = token[0] + start_offset = source.byte_offset(line, column) + + # Ripper reports columns on line 1 without counting the BOM, so we + # adjust to get the real offset + start_offset += 3 if line == 1 && bom + + if start_offset > prev_token_end + sp_value = source.slice(prev_token_end, start_offset - prev_token_end) + sp_line, sp_column = cache.line_and_column(prev_token_end) + # Ripper reports columns on line 1 without counting the BOM + sp_column -= 3 if sp_line == 1 && bom + continuation_index = sp_value.byteindex("\\") + + # ripper emits up to three :on_sp tokens when line continuations are used + if continuation_index + next_whitespace_index = continuation_index + 1 + next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r" + next_whitespace_index += 1 + first_whitespace = sp_value[0...continuation_index] #: String + continuation = sp_value[continuation_index...next_whitespace_index] #: String + second_whitespace = sp_value[next_whitespace_index..] || "" + + new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty? + new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state] + new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty? else - results << token - previous = token + new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state] end - else - results << token - previous = token end - end - - results - end - - private - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) + new_tokens << token + prev_token_state = token[3] + prev_token_end = start_offset + token[2].bytesize end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? + + if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl + end_offset = eof_token.location.end_offset + if prev_token_end < end_offset + new_tokens << [ + [source.line(prev_token_end), source.column(prev_token_end)], + :on_sp, + source.slice(prev_token_end, end_offset - prev_token_end), + prev_token_state + ] end end + + new_tokens end end - private_constant :LexRipper + private_constant :LexCompat end diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb index 8674544065..8a6624e76d 100644 --- a/lib/prism/node_ext.rb +++ b/lib/prism/node_ext.rb @@ -1,13 +1,37 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled +#-- # Here we are reopening the prism module to provide methods on nodes that aren't # templated and are meant as convenience methods. +#++ module Prism + class Node + #: (*String replacements) -> void + def deprecated(*replacements) # :nodoc: + location = caller_locations(1, 1)&.[](0)&.label + suggest = replacements.map { |replacement| "#{self.class}##{replacement}" } + + warn(<<~MSG, uplevel: 1, category: :deprecated) + [deprecation]: #{self.class}##{location} is deprecated and will be \ + removed in the next major version. Use #{suggest.join("/")} instead. + #{(caller(1, 3) || []).join("\n")} + MSG + end + end + module RegularExpressionOptions # :nodoc: # Returns a numeric value that represents the flags that were used to create # the regular expression. - def options - o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE) + #-- + #: (Integer flags) -> Integer + def self.options(flags) + o = 0 + o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE) + o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED) + o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE) o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8) o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT) o @@ -15,67 +39,121 @@ module Prism end class InterpolatedMatchLastLineNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end class InterpolatedRegularExpressionNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end class MatchLastLineNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end class RegularExpressionNode < Node - include RegularExpressionOptions + # Returns a numeric value that represents the flags that were used to create + # the regular expression. + #-- + #: () -> Integer + def options + RegularExpressionOptions.options(flags) + end end private_constant :RegularExpressionOptions module HeredocQuery # :nodoc: # Returns true if this node was represented as a heredoc in the source code. - def heredoc? + #-- + #: (String? opening) -> bool? + def self.heredoc?(opening) + # @type self: InterpolatedStringNode | InterpolatedXStringNode | StringNode | XStringNode opening&.start_with?("<<") end end class InterpolatedStringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end end class InterpolatedXStringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end end class StringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end # Occasionally it's helpful to treat a string as if it were interpolated so # that there's a consistent interface for working with strings. + #-- + #: () -> InterpolatedStringNode def to_interpolated InterpolatedStringNode.new( source, + -1, + location, frozen? ? InterpolatedStringNodeFlags::FROZEN : 0, opening_loc, - [copy(opening_loc: nil, closing_loc: nil, location: content_loc)], - closing_loc, - location + [copy(location: content_loc, opening_loc: nil, closing_loc: nil)], + closing_loc ) end end class XStringNode < Node - include HeredocQuery + # Returns true if this node was represented as a heredoc in the source code. + #-- + #: () -> bool? + def heredoc? + HeredocQuery.heredoc?(opening) + end # Occasionally it's helpful to treat a string as if it were interpolated so # that there's a consistent interface for working with strings. + #-- + #: () -> InterpolatedXStringNode def to_interpolated InterpolatedXStringNode.new( source, + -1, + location, + flags, opening_loc, - [StringNode.new(source, 0, nil, content_loc, nil, unescaped, content_loc)], - closing_loc, - location + [StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)], + closing_loc ) end end @@ -84,6 +162,8 @@ module Prism class ImaginaryNode < Node # Returns the value of the node as a Ruby Complex. + #-- + #: () -> Complex def value Complex(0, numeric.value) end @@ -91,19 +171,25 @@ module Prism class RationalNode < Node # Returns the value of the node as a Ruby Rational. + #-- + #: () -> Rational def value - Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r")) + Rational(numerator, denominator) end end class ConstantReadNode < Node # Returns the list of parts for the full name of this constant. # For example: [:Foo] + #-- + #: () -> Array[Symbol] def full_name_parts [name] end # Returns the full name of this constant. For example: "Foo" + #-- + #: () -> String def full_name name.to_s end @@ -112,11 +198,15 @@ module Prism class ConstantWriteNode < Node # Returns the list of parts for the full name of this constant. # For example: [:Foo] + #-- + #: () -> Array[Symbol] def full_name_parts [name] end # Returns the full name of this constant. For example: "Foo" + #-- + #: () -> String def full_name name.to_s end @@ -131,23 +221,26 @@ module Prism # local variable class DynamicPartsInConstantPathError < StandardError; end - # An error class raised when missing nodes are found while computing a + # An error class raised when error recovery nodes are found while computing a # constant path's full name. For example: # Foo:: -> raises because the constant path is missing the last part - class MissingNodesInConstantPathError < StandardError; end + class ErrorRecoveryNodesInConstantPathError < StandardError; end # Returns the list of parts for the full name of this constant path. # For example: [:Foo, :Bar] + #-- + #: () -> Array[Symbol] def full_name_parts parts = [] #: Array[Symbol] current = self #: node? while current.is_a?(ConstantPathNode) - child = current.child - if child.is_a?(MissingNode) - raise MissingNodesInConstantPathError, "Constant path contains missing nodes. Cannot compute full name" + name = current.name + if name.nil? + raise ErrorRecoveryNodesInConstantPathError, "Constant path contains error recovery nodes. Cannot compute full name" end - parts.unshift(child.name) + + parts.unshift(name) current = current.parent end @@ -159,6 +252,8 @@ module Prism end # Returns the full name of this constant path. For example: "Foo::Bar" + #-- + #: () -> String def full_name full_name_parts.join("::") end @@ -167,9 +262,11 @@ module Prism class ConstantPathTargetNode < Node # Returns the list of parts for the full name of this constant path. # For example: [:Foo, :Bar] + #-- + #: () -> Array[Symbol] def full_name_parts parts = - case parent + case (parent = self.parent) when ConstantPathNode, ConstantReadNode parent.full_name_parts when nil @@ -179,14 +276,16 @@ module Prism raise ConstantPathNode::DynamicPartsInConstantPathError, "Constant target path contains dynamic parts. Cannot compute full name" end - if child.is_a?(MissingNode) - raise ConstantPathNode::MissingNodesInConstantPathError, "Constant target path contains missing nodes. Cannot compute full name" + if (name = self.name).nil? + raise ConstantPathNode::ErrorRecoveryNodesInConstantPathError, "Constant target path contains error recovery nodes. Cannot compute full name" end - parts.push(child.name) + parts.push(name) end # Returns the full name of this constant path. For example: "Foo::Bar" + #-- + #: () -> String def full_name full_name_parts.join("::") end @@ -195,11 +294,15 @@ module Prism class ConstantTargetNode < Node # Returns the list of parts for the full name of this constant. # For example: [:Foo] + #-- + #: () -> Array[Symbol] def full_name_parts [name] end # Returns the full name of this constant. For example: "Foo" + #-- + #: () -> String def full_name name.to_s end @@ -207,6 +310,8 @@ module Prism class ParametersNode < Node # Mirrors the Method#parameters method. + #-- + #: () -> Array[[Symbol, Symbol] | [Symbol]] def signature names = [] #: Array[[Symbol, Symbol] | [Symbol]] @@ -216,15 +321,15 @@ module Prism optionals.each { |param| names << [:opt, param.name] } - if rest && rest.is_a?(RestParameterNode) + if (rest = self.rest).is_a?(RestParameterNode) names << [:rest, rest.name || :*] end posts.each do |param| - if param.is_a?(MultiTargetNode) + case param + when MultiTargetNode names << [:req] - elsif param.is_a?(NoKeywordsParameterNode) - # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts + when ErrorRecoveryNode raise "Invalid syntax" else names << [:req, param.name] @@ -244,7 +349,7 @@ module Prism keyopt.each { |param| names << [:key, param.name] } - case keyword_rest + case (keyword_rest = self.keyword_rest) when ForwardingParameterNode names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]]) when KeywordRestParameterNode @@ -253,8 +358,31 @@ module Prism names << [:nokey] end - names << [:block, block.name || :&] if block + case (block = self.block) + when BlockParameterNode + names << [:block, block.name || :&] + when NoBlockParameterNode + names << [:noblock] + end + names end end + + class CallNode < Node + # When a call node has the attribute_write flag set, it means that the call + # is using the attribute write syntax. This is either a method call to []= + # or a method call to a method that ends with =. Either way, the = sign is + # present in the source. + # + # Prism returns the message_loc _without_ the = sign attached, because there + # can be any amount of space between the message and the = sign. However, + # sometimes you want the location of the full message including the inner + # space and the = sign. This method provides that. + #-- + #: () -> Location? + def full_message_loc + attribute_write? ? message_loc&.adjoin("=") : message_loc + end + end end diff --git a/lib/prism/node_find.rb b/lib/prism/node_find.rb new file mode 100644 index 0000000000..697ee430e8 --- /dev/null +++ b/lib/prism/node_find.rb @@ -0,0 +1,185 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled + +module Prism + # Finds the Prism AST node corresponding to a given Method, UnboundMethod, + # Proc, or Thread::Backtrace::Location. On CRuby, uses node_id from the + # instruction sequence for an exact match. On other implementations, falls + # back to best-effort matching by source location line number. + # + # This module is autoloaded so that programs that don't use Prism.find don't + # pay for its definition. + module NodeFind # :nodoc: + # Find the node for the given callable or backtrace location. + #-- + #: (Method | UnboundMethod | Proc | Thread::Backtrace::Location callable, bool rubyvm) -> Node? + def self.find(callable, rubyvm) + case callable + when Proc + if rubyvm + RubyVMCallableFind.new.find(callable) + elsif callable.lambda? + LineLambdaFind.new.find(callable) + else + LineProcFind.new.find(callable) + end + when Method, UnboundMethod + if rubyvm + RubyVMCallableFind.new.find(callable) + else + LineMethodFind.new.find(callable) + end + when Thread::Backtrace::Location + if rubyvm + RubyVMBacktraceLocationFind.new.find(callable) + else + LineBacktraceLocationFind.new.find(callable) + end + else + raise ArgumentError, "Expected a Method, UnboundMethod, Proc, or Thread::Backtrace::Location, got #{callable.class}" + end + end + + # Base class that handles parsing a file. + class Find + private + + # Parse the given file path, returning a ParseResult or nil. + #-- + #: (String? file) -> ParseResult? + def parse_file(file) + return unless file && File.readable?(file) + result = Prism.parse_file(file) + result if result.success? + end + end + + # Finds the AST node for a Method, UnboundMethod, or Proc using the node_id + # from the instruction sequence. + class RubyVMCallableFind < Find + # Find the node for the given callable using the ISeq node_id. + #-- + #: (Method | UnboundMethod | Proc callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + return unless (iseq = RubyVM::InstructionSequence.of(callable)) + + header = iseq.to_a[4] + return unless header[:parser] == :prism + + result.value.find { |node| node.node_id == header[:node_id] } + end + end + + # Finds the AST node for a Thread::Backtrace::Location using the node_id + # from the backtrace location. + class RubyVMBacktraceLocationFind < Find + # Find the node for the given backtrace location using node_id. + #-- + #: (Thread::Backtrace::Location location) -> Node? + def find(location) + file = location.absolute_path || location.path + return unless (result = parse_file(file)) + return unless RubyVM::AbstractSyntaxTree.respond_to?(:node_id_for_backtrace_location) + + node_id = RubyVM::AbstractSyntaxTree.node_id_for_backtrace_location(location) + + result.value.find { |node| node.node_id == node_id } + end + end + + # Finds the AST node for a Method or UnboundMethod using best-effort line + # matching. Used on non-CRuby implementations. + class LineMethodFind < Find + # Find the node for the given method by matching on name and line. + #-- + #: (Method | UnboundMethod callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + + name = callable.name + start_line = source_location[1] + + result.value.find do |node| + case node + when DefNode + node.name == name && node.location.start_line == start_line + when CallNode + node.block.is_a?(BlockNode) && node.location.start_line == start_line + else + false + end + end + end + end + + # Finds the AST node for a lambda using best-effort line matching. Used + # on non-CRuby implementations. + class LineLambdaFind < Find + # Find the node for the given lambda by matching on line. + #-- + #: (Proc callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + + start_line = source_location[1] + + result.value.find do |node| + case node + when LambdaNode + node.location.start_line == start_line + when CallNode + node.block.is_a?(BlockNode) && node.location.start_line == start_line + else + false + end + end + end + end + + # Finds the AST node for a non-lambda Proc using best-effort line + # matching. Used on non-CRuby implementations. + class LineProcFind < Find + # Find the node for the given proc by matching on line. + #-- + #: (Proc callable) -> Node? + def find(callable) + return unless (source_location = callable.source_location) + return unless (result = parse_file(source_location[0])) + + start_line = source_location[1] + + result.value.find do |node| + case node + when ForNode + node.location.start_line == start_line + when CallNode + node.block.is_a?(BlockNode) && node.location.start_line == start_line + else + false + end + end + end + end + + # Finds the AST node for a Thread::Backtrace::Location using best-effort + # line matching. Used on non-CRuby implementations. + class LineBacktraceLocationFind < Find + # Find the node for the given backtrace location by matching on line. + #-- + #: (Thread::Backtrace::Location location) -> Node? + def find(location) + file = location.absolute_path || location.path + return unless (result = parse_file(file)) + + start_line = location.lineno + result.value.find { |node| node.location.start_line == start_line } + end + end + end +end diff --git a/lib/prism/node_inspector.rb b/lib/prism/node_inspector.rb deleted file mode 100644 index d77af33c3a..0000000000 --- a/lib/prism/node_inspector.rb +++ /dev/null @@ -1,68 +0,0 @@ -# frozen_string_literal: true - -module Prism - # This object is responsible for generating the output for the inspect method - # implementations of child nodes. - class NodeInspector # :nodoc: - attr_reader :prefix, :output - - def initialize(prefix = "") - @prefix = prefix - @output = +"" - end - - # Appends a line to the output with the current prefix. - def <<(line) - output << "#{prefix}#{line}" - end - - # This generates a string that is used as the header of the inspect output - # for any given node. - def header(node) - output = +"@ #{node.class.name.split("::").last} (" - output << "location: (#{node.location.start_line},#{node.location.start_column})-(#{node.location.end_line},#{node.location.end_column})" - output << ", newline: true" if node.newline? - output << ")\n" - output - end - - # Generates a string that represents a list of nodes. It handles properly - # using the box drawing characters to make the output look nice. - def list(prefix, nodes) - output = +"(length: #{nodes.length})\n" - last_index = nodes.length - 1 - - nodes.each_with_index do |node, index| - pointer, preadd = (index == last_index) ? ["└── ", " "] : ["├── ", "│ "] - node_prefix = "#{prefix}#{preadd}" - output << node.inspect(NodeInspector.new(node_prefix)).sub(node_prefix, "#{prefix}#{pointer}") - end - - output - end - - # Generates a string that represents a location field on a node. - def location(value) - if value - "(#{value.start_line},#{value.start_column})-(#{value.end_line},#{value.end_column}) = #{value.slice.inspect}" - else - "∅" - end - end - - # Generates a string that represents a child node. - def child_node(node, append) - node.inspect(child_inspector(append)).delete_prefix(prefix) - end - - # Returns a new inspector that can be used to inspect a child node. - def child_inspector(append) - NodeInspector.new("#{prefix}#{append}") - end - - # Returns the output as a string. - def to_str - output - end - end -end diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb deleted file mode 100644 index c0de8ab8b7..0000000000 --- a/lib/prism/pack.rb +++ /dev/null @@ -1,228 +0,0 @@ -# frozen_string_literal: true -# typed: ignore - -module Prism - # A parser for the pack template language. - module Pack - %i[ - SPACE - COMMENT - INTEGER - UTF8 - BER - FLOAT - STRING_SPACE_PADDED - STRING_NULL_PADDED - STRING_NULL_TERMINATED - STRING_MSB - STRING_LSB - STRING_HEX_HIGH - STRING_HEX_LOW - STRING_UU - STRING_MIME - STRING_BASE64 - STRING_FIXED - STRING_POINTER - MOVE - BACK - NULL - - UNSIGNED - SIGNED - SIGNED_NA - - AGNOSTIC_ENDIAN - LITTLE_ENDIAN - BIG_ENDIAN - NATIVE_ENDIAN - ENDIAN_NA - - SIZE_SHORT - SIZE_INT - SIZE_LONG - SIZE_LONG_LONG - SIZE_8 - SIZE_16 - SIZE_32 - SIZE_64 - SIZE_P - SIZE_NA - - LENGTH_FIXED - LENGTH_MAX - LENGTH_RELATIVE - LENGTH_NA - ].each do |const| - const_set(const, const) - end - - # A directive in the pack template language. - class Directive - # A symbol representing the version of Ruby. - attr_reader :version - - # A symbol representing whether or not we are packing or unpacking. - attr_reader :variant - - # A byteslice of the source string that this directive represents. - attr_reader :source - - # The type of the directive. - attr_reader :type - - # The type of signedness of the directive. - attr_reader :signed - - # The type of endianness of the directive. - attr_reader :endian - - # The size of the directive. - attr_reader :size - - # The length type of this directive (used for integers). - attr_reader :length_type - - # The length of this directive (used for integers). - attr_reader :length - - # Initialize a new directive with the given values. - def initialize(version, variant, source, type, signed, endian, size, length_type, length) - @version = version - @variant = variant - @source = source - @type = type - @signed = signed - @endian = endian - @size = size - @length_type = length_type - @length = length - end - - # The descriptions of the various types of endianness. - ENDIAN_DESCRIPTIONS = { - AGNOSTIC_ENDIAN: "agnostic", - LITTLE_ENDIAN: "little-endian (VAX)", - BIG_ENDIAN: "big-endian (network)", - NATIVE_ENDIAN: "native-endian", - ENDIAN_NA: "n/a" - } - - # The descriptions of the various types of signedness. - SIGNED_DESCRIPTIONS = { - UNSIGNED: "unsigned", - SIGNED: "signed", - SIGNED_NA: "n/a" - } - - # The descriptions of the various types of sizes. - SIZE_DESCRIPTIONS = { - SIZE_SHORT: "short", - SIZE_INT: "int-width", - SIZE_LONG: "long", - SIZE_LONG_LONG: "long long", - SIZE_8: "8-bit", - SIZE_16: "16-bit", - SIZE_32: "32-bit", - SIZE_64: "64-bit", - SIZE_P: "pointer-width" - } - - # Provide a human-readable description of the directive. - def describe - case type - when SPACE - "whitespace" - when COMMENT - "comment" - when INTEGER - if size == SIZE_8 - base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer" - else - base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer" - end - case length_type - when LENGTH_FIXED - if length > 1 - base + ", x#{length}" - else - base - end - when LENGTH_MAX - base + ", as many as possible" - else - raise - end - when UTF8 - "UTF-8 character" - when BER - "BER-compressed integer" - when FLOAT - "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float" - when STRING_SPACE_PADDED - "arbitrary binary string (space padded)" - when STRING_NULL_PADDED - "arbitrary binary string (null padded, count is width)" - when STRING_NULL_TERMINATED - "arbitrary binary string (null padded, count is width), except that null is added with *" - when STRING_MSB - "bit string (MSB first)" - when STRING_LSB - "bit string (LSB first)" - when STRING_HEX_HIGH - "hex string (high nibble first)" - when STRING_HEX_LOW - "hex string (low nibble first)" - when STRING_UU - "UU-encoded string" - when STRING_MIME - "quoted printable, MIME encoding" - when STRING_BASE64 - "base64 encoded string" - when STRING_FIXED - "pointer to a structure (fixed-length string)" - when STRING_POINTER - "pointer to a null-terminated string" - when MOVE - "move to absolute position" - when BACK - "back up a byte" - when NULL - "null byte" - else - raise - end - end - end - - # The result of parsing a pack template. - class Format - # A list of the directives in the template. - attr_reader :directives - - # The encoding of the template. - attr_reader :encoding - - # Create a new Format with the given directives and encoding. - def initialize(directives, encoding) - @directives = directives - @encoding = encoding - end - - # Provide a human-readable description of the format. - def describe - source_width = directives.map { |d| d.source.inspect.length }.max - directive_lines = directives.map do |directive| - if directive.type == SPACE - source = directive.source.inspect - else - source = directive.source - end - # @type var source_width: Integer - " #{source.ljust(source_width)} #{directive.describe}" - end - - (["Directives:"] + directive_lines + ["Encoding:", " #{encoding}"]).join("\n") - end - end - end -end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 39e15f6027..93d3c006b7 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -1,61 +1,175 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism + # @rbs! + # # An internal interface for a cache that can be used to compute code + # # units from byte offsets. + # interface _CodeUnitsCache + # def []: (Integer byte_offset) -> Integer + # end + # This represents a source of Ruby code that has been parsed. It is used in # conjunction with locations to allow them to resolve line numbers and source # ranges. class Source + # Create a new source object with the given source code. This method should + # be used instead of `new` and it will return either a `Source` or a + # specialized and more performant `ASCIISource` if no multibyte characters + # are present in the source code. + # + # Note that if you are calling this method manually, you will need to supply + # the start_line and offsets parameters. start_line is the line number that + # the source starts on, which is typically 1 but can be different if this + # source is a subset of a larger source or if this is an eval. offsets is an + # array of byte offsets for the start of each line in the source code, which + # can be calculated by iterating through the source code and recording the + # byte offset whenever a newline character is encountered. The first + # element is always 0 to mark the first line. + #-- + #: (String source, Integer start_line, Array[Integer] offsets) -> Source + def self.for(source, start_line, offsets) + if source.ascii_only? + ASCIISource.new(source, start_line, offsets) + elsif source.encoding == Encoding::BINARY + source.force_encoding(Encoding::UTF_8) + + if source.valid_encoding? + new(source, start_line, offsets) + else + # This is an extremely niche use case where the file is marked as + # binary, contains multi-byte characters, and those characters are not + # valid UTF-8. In this case we'll mark it as binary and fall back to + # treating everything as a single-byte character. This _may_ cause + # problems when asking for code units, but it appears to be the + # cleanest solution at the moment. + source.force_encoding(Encoding::BINARY) + ASCIISource.new(source, start_line, offsets) + end + else + new(source, start_line, offsets) + end + end + # The source code that this source object represents. - attr_reader :source + attr_reader :source #: String # The line number where this source starts. - attr_reader :start_line + attr_reader :start_line #: Integer + + # The list of newline byte offsets in the source code. When initialized from + # the C extension, this may be a packed binary string of uint32_t values + # that is lazily unpacked on first access. + #-- + #: () -> Array[Integer] + def offsets + offsets = @offsets + return offsets if offsets.is_a?(Array) + @offsets = offsets.unpack("L*") + end + + # Create a new source object with the given source code. The offsets + # parameter can be either an Array of Integer byte offsets or a packed + # binary string of uint32_t values (from the C extension). + #-- + #: (String source, Integer start_line, Array[Integer] | String offsets) -> void + def initialize(source, start_line, offsets) + @source = source + @start_line = start_line + @offsets = offsets + end - # The list of newline byte offsets in the source code. - attr_reader :offsets + # Replace the value of start_line with the given value. + #-- + #: (Integer start_line) -> void + def replace_start_line(start_line) + @start_line = start_line + end - # Create a new source object with the given source code. - def initialize(source, start_line = 1, offsets = []) - @source = source - @start_line = start_line # set after parsing is done - @offsets = offsets # set after parsing is done + # Replace the value of offsets with the given value. + #-- + #: (Array[Integer] offsets) -> void + def replace_offsets(offsets) + @offsets = offsets end # Returns the encoding of the source code, which is set by parameters to the # parser or by the encoding magic comment. + #-- + #: () -> Encoding def encoding source.encoding end + # Returns the lines of the source code as an array of strings. + #-- + #: () -> Array[String] + def lines + source.lines + end + # Perform a byteslice on the source code using the given byte offset and # byte length. + #-- + #: (Integer byte_offset, Integer length) -> String def slice(byte_offset, length) source.byteslice(byte_offset, length) or raise end + # Converts the line number and column in bytes to a byte offset. + #-- + #: (Integer line, Integer column) -> Integer + def byte_offset(line, column) + normal = line - @start_line + raise IndexError if normal < 0 + offsets.fetch(normal) + column + rescue IndexError + raise ArgumentError, "line #{line} is out of range" + end + # Binary search through the offsets to find the line number for the given # byte offset. + #-- + #: (Integer byte_offset) -> Integer def line(byte_offset) start_line + find_line(byte_offset) end # Return the byte offset of the start of the line corresponding to the given # byte offset. + #-- + #: (Integer byte_offset) -> Integer def line_start(byte_offset) offsets[find_line(byte_offset)] end - # Return the column number for the given byte offset. + # Returns the byte offset of the end of the line corresponding to the given + # byte offset. + #-- + #: (Integer byte_offset) -> Integer + def line_end(byte_offset) + offsets[find_line(byte_offset) + 1] || source.bytesize + end + + # Return the column in bytes for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def column(byte_offset) byte_offset - line_start(byte_offset) end # Return the character offset for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def character_offset(byte_offset) (source.byteslice(0, byte_offset) or raise).length end - # Return the column number in characters for the given byte offset. + # Return the column in characters for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer def character_column(byte_offset) character_offset(byte_offset) - character_offset(line_start(byte_offset)) end @@ -66,37 +180,215 @@ module Prism # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the # concept of code units that differs from the number of characters in other # encodings, it is not captured here. + # + # We purposefully replace invalid and undefined characters with replacement + # characters in this conversion. This happens for two reasons. First, it's + # possible that the given byte offset will not occur on a character + # boundary. Second, it's possible that the source code will contain a + # character that has no equivalent in the given encoding. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer def code_units_offset(byte_offset, encoding) - byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding) - (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length + return byte_offset if encoding == Encoding::UTF_8 + + byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace) + + if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE + byteslice.bytesize / 2 + else + byteslice.length + end + end + + # Generate a cache that targets a specific encoding for calculating code + # unit offsets. + #-- + #: (Encoding encoding) -> CodeUnitsCache + def code_units_cache(encoding) + CodeUnitsCache.new(source, encoding) end - # Returns the column number in code units for the given encoding for the + # Returns the column in code units for the given encoding for the # given byte offset. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer def code_units_column(byte_offset, encoding) code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding) end - private + # Freeze this object and the objects it contains. + #-- + #: () -> void + def deep_freeze + source.freeze + offsets.freeze + freeze + end - # Binary search through the offsets to find the line number for the given + # Binary search through the offsets to find the index for the given # byte offset. - def find_line(byte_offset) - left = 0 - right = offsets.length - 1 + #-- + #: (Integer byte_offset) -> Integer + def find_line(byte_offset) # :nodoc: + index = offsets.bsearch_index { |offset| offset > byte_offset } || offsets.length + index - 1 + end + end + + # A cache that can be used to quickly compute code unit offsets from byte + # offsets. It purposefully provides only a single #[] method to access the + # cache in order to minimize surface area. + # + # Note that there are some known issues here that may or may not be addressed + # in the future: + # + # * The first is that there are issues when the cache computes values that are + # not on character boundaries. This can result in subsequent computations + # being off by one or more code units. + # * The second is that this cache is currently unbounded. In theory we could + # introduce some kind of LRU cache to limit the number of entries, but this + # has not yet been implemented. + # + class CodeUnitsCache + # Counter used for UTF-8, where one code unit equals one byte. + class UTF8Counter # :nodoc: + #: (Integer byte_offset, Integer byte_length) -> Integer + def count(byte_offset, byte_length) + byte_length + end + end + + class UTF16Counter # :nodoc: + # @rbs @source: String + # @rbs @encoding: Encoding + + #: (String source, Encoding encoding) -> void + def initialize(source, encoding) + @source = source + @encoding = encoding + end + + #: (Integer byte_offset, Integer byte_length) -> Integer + def count(byte_offset, byte_length) + (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2 + end + end - while left <= right - mid = left + (right - left) / 2 - return mid if (offset = offsets[mid]) == byte_offset + # Counter used for UTF-32, where one code unit equals one code point and + # matches String#length. Also used as a best-effort fallback for any other + # encoding that does not have a dedicated counter. + class UTF32Counter # :nodoc: + # @rbs @source: String + # @rbs @encoding: Encoding - if offset < byte_offset - left = mid + 1 + #: (String source, Encoding encoding) -> void + def initialize(source, encoding) + @source = source + @encoding = encoding + end + + #: (Integer byte_offset, Integer byte_length) -> Integer + def count(byte_offset, byte_length) + (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).length + end + end + + private_constant :UTF8Counter, :UTF16Counter, :UTF32Counter + + # @rbs @source: String + # @rbs @counter: UTF8Counter | UTF16Counter | UTF32Counter + # @rbs @cache: Hash[Integer, Integer] + # @rbs @offsets: Array[Integer] + + # Initialize a new cache with the given source and encoding. + #-- + #: (String source, Encoding encoding) -> void + def initialize(source, encoding) + @source = source + @counter = + case encoding + when Encoding::UTF_8 + UTF8Counter.new + when Encoding::UTF_16LE, Encoding::UTF_16BE + UTF16Counter.new(source, encoding) else - right = mid - 1 + UTF32Counter.new(source, encoding) end - end - left - 1 + @cache = {} #: Hash[Integer, Integer] + @offsets = [] #: Array[Integer] + end + + # Retrieve the code units offset from the given byte offset. + #-- + #: (Integer byte_offset) -> Integer + def [](byte_offset) + @cache[byte_offset] ||= + if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil? + @offsets << byte_offset + @counter.count(0, byte_offset) + elsif index == 0 + @offsets.unshift(byte_offset) + @counter.count(0, byte_offset) + else + @offsets.insert(index, byte_offset) + offset = @offsets[index - 1] + @cache[offset] + @counter.count(offset, byte_offset - offset) + end + end + end + + # Specialized version of Prism::Source for source code that includes ASCII + # characters only. This class is used to apply performance optimizations that + # cannot be applied to sources that include multibyte characters. + # + # In the extremely rare case that a source includes multi-byte characters but + # is marked as binary because of a magic encoding comment and it cannot be + # eagerly converted to UTF-8, this class will be used as well. This is because + # at that point we will treat everything as single-byte characters. + class ASCIISource < Source + # Return the character offset for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer + def character_offset(byte_offset) + byte_offset + end + + # Return the column in characters for the given byte offset. + #-- + #: (Integer byte_offset) -> Integer + def character_column(byte_offset) + byte_offset - line_start(byte_offset) + end + + # Returns the offset from the start of the file for the given byte offset + # counting in code units for the given encoding. + # + # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the + # concept of code units that differs from the number of characters in other + # encodings, it is not captured here. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer + def code_units_offset(byte_offset, encoding) + byte_offset + end + + # Returns a cache that is the identity function in order to maintain the + # same interface. We can do this because code units are always equivalent to + # byte offsets for ASCII-only sources. + #-- + #: (Encoding encoding) -> _CodeUnitsCache + def code_units_cache(encoding) + ->(byte_offset) { byte_offset } + end + + # Specialized version of `code_units_column` that does not depend on + # `code_units_offset`, which is a more expensive operation. This is + # essentially the same as `Prism::Source#column`. + #-- + #: (Integer byte_offset, Encoding encoding) -> Integer + def code_units_column(byte_offset, encoding) + byte_offset - line_start(byte_offset) end end @@ -104,18 +396,23 @@ module Prism class Location # A Source object that is used to determine more information from the given # offset and length. - attr_reader :source + attr_reader :source #: Source protected :source # The byte offset from the beginning of the source where this location # starts. - attr_reader :start_offset + attr_reader :start_offset #: Integer # The length of this location in bytes. - attr_reader :length + attr_reader :length #: Integer + + # @rbs @leading_comments: Array[Comment]? + # @rbs @trailing_comments: Array[Comment]? # Create a new location object with the given source, start byte offset, and # byte length. + #-- + #: (Source source, Integer start_offset, Integer length) -> void def initialize(source, start_offset, length) @source = source @start_offset = start_offset @@ -130,142 +427,244 @@ module Prism # These are the comments that are associated with this location that exist # before the start of this location. + #-- + #: () -> Array[Comment] def leading_comments @leading_comments ||= [] end # Attach a comment to the leading comments of this location. + #-- + #: (Comment comment) -> void def leading_comment(comment) leading_comments << comment end # These are the comments that are associated with this location that exist # after the end of this location. + #-- + #: () -> Array[Comment] def trailing_comments @trailing_comments ||= [] end # Attach a comment to the trailing comments of this location. + #-- + #: (Comment comment) -> void def trailing_comment(comment) trailing_comments << comment end # Returns all comments that are associated with this location (both leading # and trailing comments). + #-- + #: () -> Array[Comment] def comments - [*@leading_comments, *@trailing_comments] + [*@leading_comments, *@trailing_comments] #: Array[Comment] end # Create a new location object with the given options. + #-- + #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location def copy(source: self.source, start_offset: self.start_offset, length: self.length) Location.new(source, start_offset, length) end # Returns a new location that is the result of chopping off the last byte. + #-- + #: () -> Location def chop copy(length: length == 0 ? length : length - 1) end # Returns a string representation of this location. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>" end + # Returns all of the lines of the source code associated with this location. + #-- + #: () -> Array[String] + def source_lines + source.lines + end + # The source code that this location represents. + #-- + #: () -> String def slice source.slice(start_offset, length) end + # The source code that this location represents starting from the beginning + # of the line that this location starts on to the end of the line that this + # location ends on. + #-- + #: () -> String + def slice_lines + line_start = source.line_start(start_offset) + line_end = source.line_end(end_offset) + source.slice(line_start, line_end - line_start) + end + # The character offset from the beginning of the source where this location # starts. + #-- + #: () -> Integer def start_character_offset source.character_offset(start_offset) end # The offset from the start of the file in code units of the given encoding. + #-- + #: (Encoding encoding) -> Integer def start_code_units_offset(encoding = Encoding::UTF_16LE) source.code_units_offset(start_offset, encoding) end + # The start offset from the start of the file in code units using the given + # cache to fetch or calculate the value. + #-- + #: (_CodeUnitsCache cache) -> Integer + def cached_start_code_units_offset(cache) + cache[start_offset] + end + # The byte offset from the beginning of the source where this location ends. + #-- + #: () -> Integer def end_offset start_offset + length end # The character offset from the beginning of the source where this location # ends. + #-- + #: () -> Integer def end_character_offset source.character_offset(end_offset) end # The offset from the start of the file in code units of the given encoding. + #-- + #: (Encoding encoding) -> Integer def end_code_units_offset(encoding = Encoding::UTF_16LE) source.code_units_offset(end_offset, encoding) end + # The end offset from the start of the file in code units using the given + # cache to fetch or calculate the value. + #-- + #: (_CodeUnitsCache cache) -> Integer + def cached_end_code_units_offset(cache) + cache[end_offset] + end + # The line number where this location starts. + #-- + #: () -> Integer def start_line source.line(start_offset) end # The content of the line where this location starts before this location. + #-- + #: () -> String def start_line_slice offset = source.line_start(start_offset) source.slice(offset, start_offset - offset) end # The line number where this location ends. + #-- + #: () -> Integer def end_line source.line(end_offset) end - # The column number in bytes where this location starts from the start of + # The column in bytes where this location starts from the start of # the line. + #-- + #: () -> Integer def start_column source.column(start_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. + #-- + #: () -> Integer def start_character_column source.character_column(start_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # starts from the start of the line. + #-- + #: (?Encoding encoding) -> Integer def start_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(start_offset, encoding) end - # The column number in bytes where this location ends from the start of the + # The start column in code units using the given cache to fetch or calculate + # the value. + #-- + #: (_CodeUnitsCache cache) -> Integer + def cached_start_code_units_column(cache) + cache[start_offset] - cache[source.line_start(start_offset)] + end + + # The column in bytes where this location ends from the start of the # line. + #-- + #: () -> Integer def end_column source.column(end_offset) end - # The column number in characters where this location ends from the start of + # The column in characters where this location ends from the start of # the line. + #-- + #: () -> Integer def end_character_column source.character_column(end_offset) end - # The column number in code units of the given encoding where this location + # The column in code units of the given encoding where this location # ends from the start of the line. + #-- + #: (?Encoding encoding) -> Integer def end_code_units_column(encoding = Encoding::UTF_16LE) source.code_units_column(end_offset, encoding) end + # The end column in code units using the given cache to fetch or calculate + # the value. + #-- + #: (_CodeUnitsCache cache) -> Integer + def cached_end_code_units_column(cache) + cache[end_offset] - cache[source.line_start(end_offset)] + end + # Implement the hash pattern matching interface for Location. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { start_offset: start_offset, end_offset: end_offset } end # Implement the pretty print interface for Location. - def pretty_print(q) + #-- + #: (PP q) -> void + def pretty_print(q) # :nodoc: q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})") end # Returns true if the given other location is equal to this location. + #-- + #: (untyped other) -> bool def ==(other) Location === other && other.start_offset == start_offset && @@ -275,34 +674,65 @@ module Prism # Returns a new location that stretches from this location to the given # other location. Raises an error if this location is not before the other # location or if they don't share the same source. + #-- + #: (Location other) -> Location def join(other) raise "Incompatible sources" if source != other.source raise "Incompatible locations" if start_offset > other.start_offset Location.new(source, start_offset, other.end_offset - start_offset) end + + # Join this location with the first occurrence of the string in the source + # that occurs after this location on the same line, and return the new + # location. This will raise an error if the string does not exist. + #-- + #: (String string) -> Location + def adjoin(string) + line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset) + + line_suffix_index = line_suffix.byteindex(string) + raise "Could not find #{string}" if line_suffix_index.nil? + + Location.new(source, start_offset, length + line_suffix_index + string.bytesize) + end end # This represents a comment that was encountered during parsing. It is the # base class for all comment types. class Comment - # The location of this comment in the source. - attr_reader :location + # The Location of this comment in the source. + attr_reader :location #: Location # Create a new comment object with the given location. + #-- + #: (Location location) -> void def initialize(location) @location = location end # Implement the hash pattern matching interface for Comment. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { location: location } end # Returns the content of the comment by slicing it from the source code. + #-- + #: () -> String def slice location.slice end + + # Returns true if this comment happens on the same line as other code and + # false if the comment is by itself. This can only be true for inline + # comments and should be false for block comments. + #-- + #: () -> bool + def trailing? + raise NotImplementedError, "trailing? is not implemented for #{self.class}" + end end # InlineComment objects are the most common. They correspond to comments in @@ -310,12 +740,16 @@ module Prism class InlineComment < Comment # Returns true if this comment happens on the same line as other code and # false if the comment is by itself. + #-- + #: () -> bool def trailing? !location.start_line_slice.strip.empty? end # Returns a string representation of this comment. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::InlineComment @location=#{location.inspect}>" end end @@ -323,13 +757,17 @@ module Prism # EmbDocComment objects correspond to comments that are surrounded by =begin # and =end. class EmbDocComment < Comment - # This can only be true for inline comments. + # Returns false. This can only be true for inline comments. + #-- + #: () -> bool def trailing? false end # Returns a string representation of this comment. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::EmbDocComment @location=#{location.inspect}>" end end @@ -337,34 +775,44 @@ module Prism # This represents a magic comment that was encountered during parsing. class MagicComment # A Location object representing the location of the key in the source. - attr_reader :key_loc + attr_reader :key_loc #: Location # A Location object representing the location of the value in the source. - attr_reader :value_loc + attr_reader :value_loc #: Location # Create a new magic comment object with the given key and value locations. + #-- + #: (Location key_loc, Location value_loc) -> void def initialize(key_loc, value_loc) @key_loc = key_loc @value_loc = value_loc end # Returns the key of the magic comment by slicing it from the source code. + #-- + #: () -> String def key key_loc.slice end # Returns the value of the magic comment by slicing it from the source code. + #-- + #: () -> String def value value_loc.slice end # Implement the hash pattern matching interface for MagicComment. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { key_loc: key_loc, value_loc: value_loc } end # Returns a string representation of this magic comment. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>" end end @@ -373,18 +821,20 @@ module Prism class ParseError # The type of error. This is an _internal_ symbol that is used for # communicating with translation layers. It is not meant to be public API. - attr_reader :type + attr_reader :type #: Symbol # The message associated with this error. - attr_reader :message + attr_reader :message #: String # A Location object representing the location of this error in the source. - attr_reader :location + attr_reader :location #: Location # The level of this error. - attr_reader :level + attr_reader :level #: Symbol # Create a new error object with the given message and location. + #-- + #: (Symbol type, String message, Location location, Symbol level) -> void def initialize(type, message, location, level) @type = type @message = message @@ -393,12 +843,16 @@ module Prism end # Implement the hash pattern matching interface for ParseError. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { type: type, message: message, location: location, level: level } end # Returns a string representation of this error. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" end end @@ -407,18 +861,20 @@ module Prism class ParseWarning # The type of warning. This is an _internal_ symbol that is used for # communicating with translation layers. It is not meant to be public API. - attr_reader :type + attr_reader :type #: Symbol # The message associated with this warning. - attr_reader :message + attr_reader :message #: String # A Location object representing the location of this warning in the source. - attr_reader :location + attr_reader :location #: Location # The level of this warning. - attr_reader :level + attr_reader :level #: Symbol # Create a new warning object with the given message and location. + #-- + #: (Symbol type, String message, Location location, Symbol level) -> void def initialize(type, message, location, level) @type = type @message = message @@ -427,92 +883,233 @@ module Prism end # Implement the hash pattern matching interface for ParseWarning. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { type: type, message: message, location: location, level: level } end # Returns a string representation of this warning. - def inspect + #-- + #: () -> String + def inspect # :nodoc: "#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>" end end - # This represents the result of a call to ::parse or ::parse_file. It contains - # the AST, any comments that were encounters, and any errors that were - # encountered. - class ParseResult - # The value that was generated by parsing. Normally this holds the AST, but - # it can sometimes how a list of tokens or other results passed back from - # the parser. - attr_reader :value - + # This represents the result of a call to Prism.parse or Prism.parse_file. + # It contains the requested structure, any comments that were encounters, + # and any errors that were encountered. + class Result # The list of comments that were encountered during parsing. - attr_reader :comments + attr_reader :comments #: Array[Comment] # The list of magic comments that were encountered during parsing. - attr_reader :magic_comments + attr_reader :magic_comments #: Array[MagicComment] # An optional location that represents the location of the __END__ marker # and the rest of the content of the file. This content is loaded into the # DATA constant when the file being parsed is the main file being executed. - attr_reader :data_loc + attr_reader :data_loc #: Location? # The list of errors that were generated during parsing. - attr_reader :errors + attr_reader :errors #: Array[ParseError] # The list of warnings that were generated during parsing. - attr_reader :warnings + attr_reader :warnings #: Array[ParseWarning] # A Source instance that represents the source code that was parsed. - attr_reader :source + attr_reader :source #: Source - # Create a new parse result object with the given values. - def initialize(value, comments, magic_comments, data_loc, errors, warnings, source) - @value = value + # Create a new result object with the given values. + #-- + #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source) @comments = comments @magic_comments = magic_comments @data_loc = data_loc @errors = errors @warnings = warnings + @continuable = continuable @source = source end - # Implement the hash pattern matching interface for ParseResult. - def deconstruct_keys(keys) - { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } + # Implement the hash pattern matching interface for Result. + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: + { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings } end # Returns the encoding of the source code that was parsed. + #-- + #: () -> Encoding def encoding source.encoding end # Returns true if there were no errors during parsing and false if there # were. + #-- + #: () -> bool def success? errors.empty? end # Returns true if there were errors during parsing and false if there were # not. + #-- + #: () -> bool def failure? !success? end + + # Returns true if the parsed source is an incomplete expression that could + # become valid with additional input. This is useful for REPL contexts (such + # as IRB) where the user may be entering a multi-line expression one line at + # a time and the implementation needs to determine whether to wait for more + # input or to evaluate what has been entered so far. + # + # Concretely, this returns true when every error present is caused by the + # parser reaching the end of the input before a construct was closed (e.g. + # an unclosed string, array, block, or keyword), and returns false when any + # error is caused by a token that makes the input structurally invalid + # regardless of what might follow (e.g. a stray `end`, `]`, or `)` with no + # matching opener). + # + # Examples: + # + # Prism.parse("1 + [").continuable? #=> true (unclosed array) + # Prism.parse("1 + ]").continuable? #=> false (stray ]) + # Prism.parse("tap do").continuable? #=> true (unclosed block) + # Prism.parse("end.tap do").continuable? #=> false (stray end) + # + #-- + #: () -> bool + def continuable? + @continuable + end + + # Create a code units cache for the given encoding. + #-- + #: (Encoding encoding) -> _CodeUnitsCache + def code_units_cache(encoding) + source.code_units_cache(encoding) + end + end + + # This is a result specific to the `parse` and `parse_file` methods. + class ParseResult < Result + autoload :Comments, "prism/parse_result/comments" + autoload :Errors, "prism/parse_result/errors" + autoload :Newlines, "prism/parse_result/newlines" + + private_constant :Comments + private_constant :Errors + private_constant :Newlines + + # The syntax tree that was parsed from the source code. + attr_reader :value #: ProgramNode + + # Create a new parse result object with the given values. + #-- + #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) + end + + # Implement the hash pattern matching interface for ParseResult. + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: + super.merge!(value: value) + end + + # Attach the list of comments to their respective locations in the tree. + #-- + #: () -> void + def attach_comments! + Comments.new(self).attach! # steep:ignore + end + + # Walk the tree and mark nodes that are on a new line, loosely emulating + # the behavior of CRuby's `:line` tracepoint event. + #-- + #: () -> void + def mark_newlines! + value.accept(Newlines.new(source.offsets.size)) # steep:ignore + end + + # Returns a string representation of the syntax tree with the errors + # displayed inline. + #-- + #: () -> String + def errors_format + Errors.new(self).format + end + end + + # This is a result specific to the `lex` and `lex_file` methods. + class LexResult < Result + # The list of tokens that were parsed from the source code. + attr_reader :value #: Array[[Token, Integer]] + + # Create a new lex result object with the given values. + #-- + #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) + end + + # Implement the hash pattern matching interface for LexResult. + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: + super.merge!(value: value) + end + end + + # This is a result specific to the `parse_lex` and `parse_lex_file` methods. + class ParseLexResult < Result + # A tuple of the syntax tree and the list of tokens that were parsed from + # the source code. + attr_reader :value #: [ProgramNode, Array[[Token, Integer]]] + + # Create a new parse lex result object with the given values. + #-- + #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void + def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source) + @value = value + super(comments, magic_comments, data_loc, errors, warnings, continuable, source) + end + + # Implement the hash pattern matching interface for ParseLexResult. + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: + super.merge!(value: value) + end end # This represents a token from the Ruby source. class Token # The Source object that represents the source this token came from. - attr_reader :source + attr_reader :source #: Source private :source # The type of token that this token is. - attr_reader :type + attr_reader :type #: Symbol # A byteslice of the source that this token represents. - attr_reader :value + attr_reader :value #: String + + # @rbs @location: Location | Integer # Create a new token object with the given type, value, and location. + #-- + #: (Source source, Symbol type, String value, Location | Integer location) -> void def initialize(source, type, value, location) @source = source @type = type @@ -521,11 +1118,15 @@ module Prism end # Implement the hash pattern matching interface for Token. - def deconstruct_keys(keys) + #-- + #: (Array[Symbol]? keys) -> Hash[Symbol, untyped] + def deconstruct_keys(keys) # :nodoc: { type: type, value: value, location: location } end # A Location object representing the location of this token in the source. + #-- + #: () -> Location def location location = @location return location if location.is_a?(Location) @@ -533,7 +1134,9 @@ module Prism end # Implement the pretty print interface for Token. - def pretty_print(q) + #-- + #: (PP q) -> void + def pretty_print(q) # :nodoc: q.group do q.text(type.to_s) self.location.pretty_print(q) @@ -548,10 +1151,61 @@ module Prism end # Returns true if the given other token is equal to this token. + #-- + #: (untyped other) -> bool def ==(other) Token === other && other.type == type && other.value == value end + + # Returns a string representation of this token. + #-- + #: () -> String + def inspect # :nodoc: + location + super + end + + # Freeze this object and the objects it contains. + #-- + #: () -> void + def deep_freeze + value.freeze + location.freeze + freeze + end + end + + # This object is passed to the various Prism.* methods that accept the + # `scopes` option as an element of the list. It defines both the local + # variables visible at that scope as well as the forwarding parameters + # available at that scope. + class Scope + # The list of local variables that are defined in this scope. This should be + # defined as an array of symbols. + attr_reader :locals #: Array[Symbol] + + # The list of local variables that are forwarded to the next scope. This + # should by defined as an array of symbols containing the specific values of + # :*, :**, :&, or :"...". + attr_reader :forwarding #: Array[Symbol] + + # Create a new scope object with the given locals and forwarding. + #-- + #: (Array[Symbol] locals, Array[Symbol] forwarding) -> void + def initialize(locals, forwarding) + @locals = locals + @forwarding = forwarding + end + end + + # Create a new scope with the given locals and forwarding options that is + # suitable for passing into one of the Prism.* methods that accepts the + # `scopes` option. + #-- + #: (?locals: Array[Symbol], ?forwarding: Array[Symbol]) -> Scope + def self.scope(locals: [], forwarding: []) + Scope.new(locals, forwarding) end end diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb index f8f74d2503..df80792d39 100644 --- a/lib/prism/parse_result/comments.rb +++ b/lib/prism/parse_result/comments.rb @@ -1,7 +1,10 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism - class ParseResult + class ParseResult < Result # When we've parsed the source, we have both the syntax tree and the list of # comments that we found in the source. This class is responsible for # walking the tree and finding the nearest location to attach each comment. @@ -17,32 +20,49 @@ module Prism # the comment. Otherwise it will favor attaching to the nearest location # that is after the comment. class Comments + # @rbs! + # # An internal interface for a target that comments can be attached + # # to. This is either going to be a NodeTarget or a CommentTarget. + # interface _CommentTarget + # def start_offset: () -> Integer + # def end_offset: () -> Integer + # def encloses?: (Comment) -> bool + # def leading_comment: (Comment) -> void + # def trailing_comment: (Comment) -> void + # end + # A target for attaching comments that is based on a specific node's # location. class NodeTarget # :nodoc: - attr_reader :node + attr_reader :node #: node + #: (node node) -> void def initialize(node) @node = node end + #: () -> Integer def start_offset node.start_offset end + #: () -> Integer def end_offset node.end_offset end + #: (Comment comment) -> bool def encloses?(comment) start_offset <= comment.location.start_offset && comment.location.end_offset <= end_offset end + #: (Comment comment) -> void def leading_comment(comment) node.location.leading_comment(comment) end + #: (Comment comment) -> void def trailing_comment(comment) node.location.trailing_comment(comment) end @@ -51,44 +71,54 @@ module Prism # A target for attaching comments that is based on a location field on a # node. For example, the `end` token of a ClassNode. class LocationTarget # :nodoc: - attr_reader :location + attr_reader :location #: Location + #: (Location location) -> void def initialize(location) @location = location end + #: () -> Integer def start_offset location.start_offset end + #: () -> Integer def end_offset location.end_offset end + #: (Comment comment) -> bool def encloses?(comment) false end + #: (Comment comment) -> void def leading_comment(comment) location.leading_comment(comment) end + #: (Comment comment) -> void def trailing_comment(comment) location.trailing_comment(comment) end end # The parse result that we are attaching comments to. - attr_reader :parse_result + attr_reader :parse_result #: ParseResult # Create a new Comments object that will attach comments to the given # parse result. + #-- + #: (ParseResult parse_result) -> void def initialize(parse_result) @parse_result = parse_result end # Attach the comments to their respective locations in the tree by # mutating the parse result. + #-- + #: () -> void def attach! parse_result.comments.each do |comment| preceding, enclosing, following = nearest_targets(parse_result.value, comment) @@ -116,11 +146,13 @@ module Prism # Responsible for finding the nearest targets to the given comment within # the context of the given encapsulating node. + #-- + #: (node node, Comment comment) -> [_CommentTarget?, _CommentTarget?, _CommentTarget?] def nearest_targets(node, comment) comment_start = comment.location.start_offset comment_end = comment.location.end_offset - targets = [] #: Array[_Target] + targets = [] #: Array[_CommentTarget] node.comment_targets.map do |value| case value when StatementsNode @@ -133,8 +165,8 @@ module Prism end targets.sort_by!(&:start_offset) - preceding = nil #: _Target? - following = nil #: _Target? + preceding = nil #: _CommentTarget? + following = nil #: _CommentTarget? left = 0 right = targets.length @@ -183,12 +215,5 @@ module Prism [preceding, NodeTarget.new(node), following] end end - - private_constant :Comments - - # Attach the list of comments to their respective locations in the tree. - def attach_comments! - Comments.new(self).attach! # steep:ignore - end end end diff --git a/lib/prism/parse_result/errors.rb b/lib/prism/parse_result/errors.rb new file mode 100644 index 0000000000..388309d23d --- /dev/null +++ b/lib/prism/parse_result/errors.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled + +require "stringio" + +module Prism + class ParseResult < Result + # An object to represent the set of errors on a parse result. This object + # can be used to format the errors in a human-readable way. + class Errors + # The parse result that contains the errors. + attr_reader :parse_result #: ParseResult + + # Initialize a new set of errors from the given parse result. + #-- + #: (ParseResult parse_result) -> void + def initialize(parse_result) + @parse_result = parse_result + end + + # Formats the errors in a human-readable way and return them as a string. + #-- + #: () -> String + def format + error_lines = {} #: Hash[Integer, Array[ParseError]] + parse_result.errors.each do |error| + location = error.location + (location.start_line..location.end_line).each do |line| + error_lines[line] ||= [] + error_lines[line] << error + end + end + + source_lines = parse_result.source.source.lines + source_lines << "" if error_lines.key?(source_lines.size + 1) + + io = StringIO.new + source_lines.each.with_index(1) do |line, line_number| + io.puts(line) + + (error_lines.delete(line_number) || []).each do |error| + location = error.location + + case line_number + when location.start_line + io.print(" " * location.start_column + "^") + + if location.start_line == location.end_line + if location.start_column != location.end_column + io.print("~" * (location.end_column - location.start_column - 1)) + end + + io.puts(" " + error.message) + else + io.puts("~" * (line.bytesize - location.start_column)) + end + when location.end_line + io.puts("~" * location.end_column + " " + error.message) + else + io.puts("~" * line.bytesize) + end + end + end + + io.puts + io.string + end + end + end +end diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb index 03acb0b862..450c790226 100644 --- a/lib/prism/parse_result/newlines.rb +++ b/lib/prism/parse_result/newlines.rb @@ -1,7 +1,10 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism - class ParseResult + class ParseResult < Result # The :line tracepoint event gets fired whenever the Ruby VM encounters an # expression on a new line. The types of expressions that can trigger this # event are: @@ -17,50 +20,185 @@ module Prism # Note that the logic in this file should be kept in sync with the Java # MarkNewlinesVisitor, since that visitor is responsible for marking the # newlines for JRuby/TruffleRuby. + # + # This file is autoloaded only when `mark_newlines!` is called, so the + # re-opening of the various nodes in this file will only be performed in + # that case. We do that to avoid storing the extra `@newline` instance + # variable on every node if we don't need it. class Newlines < Visitor + # The map of lines indices to whether or not they have been marked as + # emitting a newline event. + # @rbs @lines: Array[bool] + # Create a new Newlines visitor with the given newline offsets. - def initialize(newline_marked) - @newline_marked = newline_marked + #-- + #: (Integer lines) -> void + def initialize(lines) + @lines = Array.new(1 + lines, false) end - # Permit block/lambda nodes to mark newlines within themselves. + # Permit block nodes to mark newlines within themselves. + #-- + #: (BlockNode node) -> void def visit_block_node(node) - old_newline_marked = @newline_marked - @newline_marked = Array.new(old_newline_marked.size, false) + old_lines = @lines + @lines = Array.new(old_lines.size, false) begin super(node) ensure - @newline_marked = old_newline_marked + @lines = old_lines end end - alias_method :visit_lambda_node, :visit_block_node + # Permit lambda nodes to mark newlines within themselves. + #-- + #: (LambdaNode node) -> void + def visit_lambda_node(node) + old_lines = @lines + @lines = Array.new(old_lines.size, false) - # Mark if/unless nodes as newlines. + begin + super(node) + ensure + @lines = old_lines + end + end + + # Mark if nodes as newlines. + #-- + #: (IfNode node) -> void def visit_if_node(node) - node.set_newline_flag(@newline_marked) + node.newline_flag!(@lines) super(node) end - alias_method :visit_unless_node, :visit_if_node + # Mark unless nodes as newlines. + #-- + #: (UnlessNode node) -> void + def visit_unless_node(node) + node.newline_flag!(@lines) + super(node) + end # Permit statements lists to mark newlines within themselves. + #-- + #: (StatementsNode node) -> void def visit_statements_node(node) node.body.each do |child| - child.set_newline_flag(@newline_marked) + child.newline_flag!(@lines) end super(node) end end + end + + class Node + # Tracks whether or not this node should emit a newline event when the + # instructions that it represents are executed. + # @rbs @newline_flag: bool + + #: () -> bool + def newline_flag? # :nodoc: + !!defined?(@newline_flag) + end + + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + line = location.start_line + unless lines[line] + lines[line] = true + @newline_flag = true + end + end + end + + class BeginNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + # Never mark BeginNode with a newline flag, mark children instead. + end + end + + class ParenthesesNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + # Never mark ParenthesesNode with a newline flag, mark children instead. + end + end + + class IfNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + predicate.newline_flag!(lines) + end + end - private_constant :Newlines + class UnlessNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + predicate.newline_flag!(lines) + end + end + + class UntilNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + predicate.newline_flag!(lines) + end + end + + class WhileNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + predicate.newline_flag!(lines) + end + end + + class RescueModifierNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + expression.newline_flag!(lines) + end + end + + class InterpolatedMatchLastLineNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + first = parts.first + first.newline_flag!(lines) if first + end + end + + class InterpolatedRegularExpressionNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + first = parts.first + first.newline_flag!(lines) if first + end + end + + class InterpolatedStringNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + first = parts.first + first.newline_flag!(lines) if first + end + end + + class InterpolatedSymbolNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + first = parts.first + first.newline_flag!(lines) if first + end + end - # Walk the tree and mark nodes that are on a new line. - def mark_newlines! - value = self.value - raise "This method should only be called on a parse result that contains a node" unless Node === value - value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) # steep:ignore + class InterpolatedXStringNode < Node + #: (Array[bool] lines) -> void + def newline_flag!(lines) # :nodoc: + first = parts.first + first.newline_flag!(lines) if first end end end diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb index e12cfd597f..be0493df05 100644 --- a/lib/prism/pattern.rb +++ b/lib/prism/pattern.rb @@ -1,4 +1,7 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism # A pattern is an object that wraps a Ruby pattern matching expression. The @@ -40,7 +43,9 @@ module Prism class CompilationError < StandardError # Create a new CompilationError with the given representation of the node # that caused the error. - def initialize(repr) + #-- + #: (String repr) -> void + def initialize(repr) # :nodoc: super(<<~ERROR) prism was unable to compile the pattern you provided into a usable expression. It failed on to understand the node represented by: @@ -56,10 +61,13 @@ module Prism end # The query that this pattern was initialized with. - attr_reader :query + attr_reader :query #: String + # @rbs @compiled: Proc? # Create a new pattern with the given query. The query should be a string # containing a Ruby pattern matching expression. + #-- + #: (String query) -> void def initialize(query) @query = query @compiled = nil @@ -67,6 +75,8 @@ module Prism # Compile the query into a callable object that can be used to match against # nodes. + #-- + #: () -> Proc def compile result = Prism.parse("case nil\nin #{query}\nend") @@ -83,7 +93,10 @@ module Prism # pattern. If a block is given, it will be called with each node that # matches the pattern. If no block is given, an enumerator will be returned # that will yield each node that matches the pattern. - def scan(root) + #-- + #: (node root) -> Enumerator[node, void] + #: (node root) { (node) -> void } -> void + def scan(root, &blk) return to_enum(:scan, root) unless block_given? @compiled ||= compile @@ -99,23 +112,33 @@ module Prism # Shortcut for combining two procs into one that returns true if both return # true. - def combine_and(left, right) + #-- + #: (Proc left, Proc right) -> Proc + def combine_and(left, right) # :nodoc: ->(other) { left.call(other) && right.call(other) } end # Shortcut for combining two procs into one that returns true if either # returns true. - def combine_or(left, right) + #-- + #: (Proc left, Proc right) -> Proc + def combine_or(left, right) # :nodoc: ->(other) { left.call(other) || right.call(other) } end - # Raise an error because the given node is not supported. - def compile_error(node) + # Raise an error because the given node is not supported. Note purposefully + # not typing this method since it is a no return method that Steep does not + # understand. + #-- + #: (node node) -> bot + def compile_error(node) # :nodoc: raise CompilationError, node.inspect end # in [foo, bar, baz] - def compile_array_pattern_node(node) + #-- + #: (ArrayPatternNode node) -> Proc + def compile_array_pattern_node(node) # :nodoc: compile_error(node) if !node.rest.nil? || node.posts.any? constant = node.constant @@ -140,16 +163,23 @@ module Prism end # in foo | bar - def compile_alternation_pattern_node(node) + #-- + #: (AlternationPatternNode node) -> Proc + def compile_alternation_pattern_node(node) # :nodoc: combine_or(compile_node(node.left), compile_node(node.right)) end # in Prism::ConstantReadNode - def compile_constant_path_node(node) + #-- + #: (ConstantPathNode node) -> Proc + def compile_constant_path_node(node) # :nodoc: parent = node.parent if parent.is_a?(ConstantReadNode) && parent.slice == "Prism" - compile_node(node.child) + name = node.name + raise CompilationError, node.inspect if name.nil? + + compile_constant_name(node, name) else compile_error(node) end @@ -157,15 +187,22 @@ module Prism # in ConstantReadNode # in String - def compile_constant_read_node(node) - value = node.slice + #-- + #: (ConstantReadNode node) -> Proc + def compile_constant_read_node(node) # :nodoc: + compile_constant_name(node, node.name) + end - if Prism.const_defined?(value, false) - clazz = Prism.const_get(value) + # Compile a name associated with a constant. + #-- + #: ((ConstantPathNode | ConstantReadNode) node, Symbol name) -> Proc + def compile_constant_name(node, name) # :nodoc: + if Prism.const_defined?(name, false) + clazz = Prism.const_get(name) ->(other) { clazz === other } - elsif Object.const_defined?(value, false) - clazz = Object.const_get(value) + elsif Object.const_defined?(name, false) + clazz = Object.const_get(name) ->(other) { clazz === other } else @@ -175,9 +212,14 @@ module Prism # in InstanceVariableReadNode[name: Symbol] # in { name: Symbol } - def compile_hash_pattern_node(node) + #-- + #: (HashPatternNode node) -> Proc + def compile_hash_pattern_node(node) # :nodoc: compile_error(node) if node.rest - compiled_constant = compile_node(node.constant) if node.constant + + if (constant = node.constant) + compiled_constant = compile_node(constant) + end preprocessed = node.elements.to_h do |element| @@ -205,12 +247,16 @@ module Prism end # in nil - def compile_nil_node(node) + #-- + #: (NilNode node) -> Proc + def compile_nil_node(node) # :nodoc: ->(attribute) { attribute.nil? } end # in /foo/ - def compile_regular_expression_node(node) + #-- + #: (RegularExpressionNode node) -> Proc + def compile_regular_expression_node(node) # :nodoc: regexp = Regexp.new(node.unescaped, node.closing[1..]) ->(attribute) { regexp === attribute } @@ -218,7 +264,9 @@ module Prism # in "" # in "foo" - def compile_string_node(node) + #-- + #: (StringNode node) -> Proc + def compile_string_node(node) # :nodoc: string = node.unescaped ->(attribute) { string === attribute } @@ -226,7 +274,9 @@ module Prism # in :+ # in :foo - def compile_symbol_node(node) + #-- + #: (SymbolNode node) -> Proc + def compile_symbol_node(node) # :nodoc: symbol = node.unescaped.to_sym ->(attribute) { symbol === attribute } @@ -234,7 +284,9 @@ module Prism # Compile any kind of node. Dispatch out to the individual compilation # methods based on the type of node. - def compile_node(node) + #-- + #: (node node) -> Proc + def compile_node(node) # :nodoc: case node when AlternationPatternNode compile_alternation_pattern_node(node) diff --git a/lib/prism/polyfill/append_as_bytes.rb b/lib/prism/polyfill/append_as_bytes.rb new file mode 100644 index 0000000000..24218bd171 --- /dev/null +++ b/lib/prism/polyfill/append_as_bytes.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +# Polyfill for String#append_as_bytes, which didn't exist until Ruby 3.4. +if !("".respond_to?(:append_as_bytes)) + String.include( + Module.new { + def append_as_bytes(*args) + args.each do |arg| + arg = Integer === arg ? [arg].pack("C") : arg.b + self.<<(arg) # steep:ignore + end + end + } + ) +end diff --git a/lib/prism/polyfill/byteindex.rb b/lib/prism/polyfill/byteindex.rb new file mode 100644 index 0000000000..98c6089f14 --- /dev/null +++ b/lib/prism/polyfill/byteindex.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +# Polyfill for String#byteindex, which didn't exist until Ruby 3.2. +if !("".respond_to?(:byteindex)) + String.include( + Module.new { + def byteindex(needle, offset = 0) + charindex = index(needle, offset) + slice(0...charindex).bytesize if charindex + end + } + ) +end diff --git a/lib/prism/polyfill/scan_byte.rb b/lib/prism/polyfill/scan_byte.rb new file mode 100644 index 0000000000..9276e509fc --- /dev/null +++ b/lib/prism/polyfill/scan_byte.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require "strscan" + +# Polyfill for StringScanner#scan_byte, which didn't exist until Ruby 3.4. +if !(StringScanner.method_defined?(:scan_byte)) + StringScanner.include( + Module.new { + def scan_byte # :nodoc: + get_byte&.b&.ord + end + } + ) +end diff --git a/lib/prism/polyfill/string.rb b/lib/prism/polyfill/string.rb deleted file mode 100644 index 582266d956..0000000000 --- a/lib/prism/polyfill/string.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -# Polyfill for String#unpack1 with the offset parameter. -if String.instance_method(:unpack1).parameters.none? { |_, name| name == :offset } - String.prepend( - Module.new { - def unpack1(format, offset: 0) # :nodoc: - offset == 0 ? super(format) : self[offset..].unpack1(format) # steep:ignore - end - } - ) -end diff --git a/lib/prism/polyfill/unpack1.rb b/lib/prism/polyfill/unpack1.rb new file mode 100644 index 0000000000..3fa9b5a0c5 --- /dev/null +++ b/lib/prism/polyfill/unpack1.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +# Polyfill for String#unpack1 with the offset parameter. Not all Ruby engines +# have Method#parameters implemented, so we check the arity instead if +# necessary. +if (unpack1 = String.instance_method(:unpack1)).respond_to?(:parameters) ? unpack1.parameters.none? { |_, name| name == :offset } : (unpack1.arity == 1) + String.prepend( + Module.new { + def unpack1(format, offset: 0) # :nodoc: + offset == 0 ? super(format) : self[offset..].unpack1(format) # steep:ignore + end + } + ) +end diff --git a/lib/prism/polyfill/warn.rb b/lib/prism/polyfill/warn.rb new file mode 100644 index 0000000000..76a4264623 --- /dev/null +++ b/lib/prism/polyfill/warn.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +# Polyfill for Kernel#warn with the category parameter. Not all Ruby engines +# have Method#parameters implemented, so we check the arity instead if +# necessary. +if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.parameters.none? { |_, name| name == :category } : (method.arity == -1) + Kernel.prepend( + Module.new { + def warn(*msgs, uplevel: nil, category: nil) # :nodoc: + case uplevel + when nil + super(*msgs) + when Integer + super(*msgs, uplevel: uplevel + 1) + else + super(*msgs, uplevel: uplevel.to_int + 1) + end + end + } + ) + + Object.prepend( + Module.new { + def warn(*msgs, uplevel: nil, category: nil) # :nodoc: + case uplevel + when nil + super(*msgs) + when Integer + super(*msgs, uplevel: uplevel + 1) + else + super(*msgs, uplevel: uplevel.to_int + 1) + end + end + } + ) +end diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 6cf28460c2..aac056b3f8 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -2,7 +2,7 @@ Gem::Specification.new do |spec| spec.name = "prism" - spec.version = "0.25.0" + spec.version = "1.9.0" spec.authors = ["Shopify"] spec.email = ["ruby@shopify.com"] @@ -35,121 +35,194 @@ Gem::Specification.new do |spec| "docs/parser_translation.md", "docs/parsing_rules.md", "docs/releasing.md", + "docs/relocation.md", "docs/ripper_translation.md", "docs/ruby_api.md", "docs/ruby_parser_translation.md", "docs/serialization.md", "docs/testing.md", "ext/prism/api_node.c", - "ext/prism/api_pack.c", + "ext/prism/extconf.rb", "ext/prism/extension.c", "ext/prism/extension.h", "include/prism.h", + "include/prism/compiler/accel.h", + "include/prism/compiler/align.h", + "include/prism/compiler/exported.h", + "include/prism/compiler/fallthrough.h", + "include/prism/compiler/filesystem.h", + "include/prism/compiler/flex_array.h", + "include/prism/compiler/force_inline.h", + "include/prism/compiler/format.h", + "include/prism/compiler/inline.h", + "include/prism/compiler/nodiscard.h", + "include/prism/compiler/nonnull.h", + "include/prism/compiler/unused.h", + "include/prism/internal/allocator.h", + "include/prism/internal/allocator_debug.h", + "include/prism/internal/arena.h", + "include/prism/internal/bit.h", + "include/prism/internal/buffer.h", + "include/prism/internal/char.h", + "include/prism/internal/comments.h", + "include/prism/internal/constant_pool.h", + "include/prism/internal/diagnostic.h", + "include/prism/internal/encoding.h", + "include/prism/internal/integer.h", + "include/prism/internal/isinf.h", + "include/prism/internal/line_offset_list.h", + "include/prism/internal/list.h", + "include/prism/internal/magic_comments.h", + "include/prism/internal/memchr.h", + "include/prism/internal/node.h", + "include/prism/internal/options.h", + "include/prism/internal/parser.h", + "include/prism/internal/regexp.h", + "include/prism/internal/serialize.h", + "include/prism/internal/source.h", + "include/prism/internal/static_literals.h", + "include/prism/internal/strncasecmp.h", + "include/prism/internal/stringy.h", + "include/prism/internal/strpbrk.h", + "include/prism/internal/tokens.h", + "include/prism/arena.h", "include/prism/ast.h", - "include/prism/defines.h", + "include/prism/buffer.h", + "include/prism/comments.h", + "include/prism/constant_pool.h", "include/prism/diagnostic.h", - "include/prism/encoding.h", + "include/prism/excludes.h", + "include/prism/integer.h", + "include/prism/json.h", + "include/prism/line_offset_list.h", + "include/prism/magic_comments.h", "include/prism/node.h", "include/prism/options.h", - "include/prism/pack.h", "include/prism/parser.h", "include/prism/prettyprint.h", - "include/prism/regexp.h", - "include/prism/static_literals.h", - "include/prism/util/pm_buffer.h", - "include/prism/util/pm_char.h", - "include/prism/util/pm_constant_pool.h", - "include/prism/util/pm_integer.h", - "include/prism/util/pm_list.h", - "include/prism/util/pm_memchr.h", - "include/prism/util/pm_newline_list.h", - "include/prism/util/pm_state_stack.h", - "include/prism/util/pm_strncasecmp.h", - "include/prism/util/pm_string.h", - "include/prism/util/pm_string_list.h", - "include/prism/util/pm_strpbrk.h", + "include/prism/serialize.h", + "include/prism/source.h", + "include/prism/stream.h", + "include/prism/string_query.h", + "include/prism/stringy.h", "include/prism/version.h", "lib/prism.rb", "lib/prism/compiler.rb", - "lib/prism/debug.rb", "lib/prism/desugar_compiler.rb", "lib/prism/dispatcher.rb", "lib/prism/dot_visitor.rb", "lib/prism/dsl.rb", "lib/prism/ffi.rb", + "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", - "lib/prism/node_inspector.rb", + "lib/prism/node_find.rb", "lib/prism/node.rb", - "lib/prism/pack.rb", "lib/prism/parse_result.rb", "lib/prism/parse_result/comments.rb", + "lib/prism/parse_result/errors.rb", "lib/prism/parse_result/newlines.rb", "lib/prism/pattern.rb", - "lib/prism/polyfill/string.rb", + "lib/prism/polyfill/append_as_bytes.rb", + "lib/prism/polyfill/byteindex.rb", + "lib/prism/polyfill/scan_byte.rb", + "lib/prism/polyfill/unpack1.rb", + "lib/prism/polyfill/warn.rb", + "lib/prism/reflection.rb", + "lib/prism/relocation.rb", "lib/prism/serialize.rb", + "lib/prism/string_query.rb", "lib/prism/translation.rb", "lib/prism/translation/parser.rb", - "lib/prism/translation/parser33.rb", - "lib/prism/translation/parser34.rb", + "lib/prism/translation/parser_current.rb", + "lib/prism/translation/parser_versions.rb", + "lib/prism/translation/parser/builder.rb", "lib/prism/translation/parser/compiler.rb", "lib/prism/translation/parser/lexer.rb", - "lib/prism/translation/parser/rubocop.rb", "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/filter.rb", + "lib/prism/translation/ripper/lexer.rb", "lib/prism/translation/ripper/sexp.rb", "lib/prism/translation/ripper/shim.rb", "lib/prism/translation/ruby_parser.rb", "lib/prism/visitor.rb", + "prism.gemspec", + "rbi/generated/prism.rbi", + "rbi/generated/prism/compiler.rbi", + "rbi/generated/prism/desugar_compiler.rbi", + "rbi/generated/prism/dispatcher.rbi", + "rbi/generated/prism/dot_visitor.rbi", + "rbi/generated/prism/dsl.rbi", + "rbi/generated/prism/inspect_visitor.rbi", + "rbi/generated/prism/lex_compat.rbi", + "rbi/generated/prism/mutation_compiler.rbi", + "rbi/generated/prism/node.rbi", + "rbi/generated/prism/node_ext.rbi", + "rbi/generated/prism/node_find.rbi", + "rbi/generated/prism/parse_result.rbi", + "rbi/generated/prism/pattern.rbi", + "rbi/generated/prism/reflection.rbi", + "rbi/generated/prism/relocation.rbi", + "rbi/generated/prism/serialize.rbi", + "rbi/generated/prism/string_query.rbi", + "rbi/generated/prism/translation.rbi", + "rbi/generated/prism/visitor.rbi", + "rbi/generated/prism/parse_result/comments.rbi", + "rbi/generated/prism/parse_result/errors.rbi", + "rbi/generated/prism/parse_result/newlines.rbi", + "rbi/prism/translation/parser.rbi", + "rbi/prism/translation/parser_versions.rbi", + "rbi/prism/translation/ripper.rbi", + "rbi/rubyvm/node_find.rbi", + "sig/generated/prism.rbs", + "sig/generated/prism/compiler.rbs", + "sig/generated/prism/desugar_compiler.rbs", + "sig/generated/prism/dispatcher.rbs", + "sig/generated/prism/dot_visitor.rbs", + "sig/generated/prism/dsl.rbs", + "sig/generated/prism/inspect_visitor.rbs", + "sig/generated/prism/lex_compat.rbs", + "sig/generated/prism/mutation_compiler.rbs", + "sig/generated/prism/node.rbs", + "sig/generated/prism/node_ext.rbs", + "sig/generated/prism/node_find.rbs", + "sig/generated/prism/parse_result.rbs", + "sig/generated/prism/pattern.rbs", + "sig/generated/prism/reflection.rbs", + "sig/generated/prism/relocation.rbs", + "sig/generated/prism/serialize.rbs", + "sig/generated/prism/string_query.rbs", + "sig/generated/prism/translation.rbs", + "sig/generated/prism/visitor.rbs", + "sig/generated/prism/parse_result/comments.rbs", + "sig/generated/prism/parse_result/errors.rbs", + "sig/generated/prism/parse_result/newlines.rbs", + "src/arena.c", + "src/buffer.c", + "src/char.c", + "src/constant_pool.c", "src/diagnostic.c", "src/encoding.c", + "src/integer.c", + "src/json.c", + "src/line_offset_list.c", + "src/list.c", + "src/memchr.c", "src/node.c", - "src/pack.c", + "src/options.c", + "src/parser.c", "src/prettyprint.c", + "src/prism.c", "src/regexp.c", "src/serialize.c", + "src/source.c", "src/static_literals.c", - "src/token_type.c", - "src/util/pm_buffer.c", - "src/util/pm_char.c", - "src/util/pm_constant_pool.c", - "src/util/pm_integer.c", - "src/util/pm_list.c", - "src/util/pm_memchr.c", - "src/util/pm_newline_list.c", - "src/util/pm_state_stack.c", - "src/util/pm_string.c", - "src/util/pm_string_list.c", - "src/util/pm_strncasecmp.c", - "src/util/pm_strpbrk.c", - "src/options.c", - "src/prism.c", - "prism.gemspec", - "sig/prism.rbs", - "sig/prism/compiler.rbs", - "sig/prism/dispatcher.rbs", - "sig/prism/dot_visitor.rbs", - "sig/prism/dsl.rbs", - "sig/prism/mutation_compiler.rbs", - "sig/prism/node.rbs", - "sig/prism/node_ext.rbs", - "sig/prism/pack.rbs", - "sig/prism/parse_result.rbs", - "sig/prism/pattern.rbs", - "sig/prism/serialize.rbs", - "sig/prism/visitor.rbs", - "rbi/prism.rbi", - "rbi/prism/compiler.rbi", - "rbi/prism/desugar_compiler.rbi", - "rbi/prism/mutation_compiler.rbi", - "rbi/prism/node_ext.rbi", - "rbi/prism/node.rbi", - "rbi/prism/parse_result.rbi", - "rbi/prism/translation/parser/compiler.rbi", - "rbi/prism/translation/ripper.rbi", - "rbi/prism/translation/ripper/ripper_compiler.rbi", - "rbi/prism/translation/ruby_parser.rbi", - "rbi/prism/visitor.rbi" + "src/string_query.c", + "src/stringy.c", + "src/strncasecmp.c", + "src/strpbrk.c", + "src/tokens.c" ] spec.extensions = ["ext/prism/extconf.rb"] diff --git a/lib/prism/relocation.rb b/lib/prism/relocation.rb new file mode 100644 index 0000000000..af0f792827 --- /dev/null +++ b/lib/prism/relocation.rb @@ -0,0 +1,665 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled + +module Prism + # Prism parses deterministically for the same input. This provides a nice + # property that is exposed through the #node_id API on nodes. Effectively this + # means that for the same input, these values will remain consistent every + # time the source is parsed. This means we can reparse the source same with a + # #node_id value and find the exact same node again. + # + # The Relocation module provides an API around this property. It allows you to + # "save" nodes and locations using a minimal amount of memory (just the + # node_id and a field identifier) and then reify them later. + module Relocation + # @rbs! + # type entry_value = untyped + # type entry_values = Hash[Symbol, entry_value] + # + # interface _Value + # def start_line: () -> Integer + # def end_line: () -> Integer + # def start_offset: () -> Integer + # def end_offset: () -> Integer + # def start_character_offset: () -> Integer + # def end_character_offset: () -> Integer + # def cached_start_code_units_offset: (_CodeUnitsCache cache) -> Integer + # def cached_end_code_units_offset: (_CodeUnitsCache cache) -> Integer + # def start_column: () -> Integer + # def end_column: () -> Integer + # def start_character_column: () -> Integer + # def end_character_column: () -> Integer + # def cached_start_code_units_column: (_CodeUnitsCache cache) -> Integer + # def cached_end_code_units_column: (_CodeUnitsCache cache) -> Integer + # def leading_comments: () -> Array[Comment] + # def trailing_comments: () -> Array[Comment] + # end + # + # interface _Field + # def fields: (_Value value) -> entry_values + # end + + # An entry in a repository that will lazily reify its values when they are + # first accessed. + class Entry + # Raised if a value that could potentially be on an entry is missing + # because it was either not configured on the repository or it has not yet + # been fetched. + class MissingValueError < StandardError + end + + # @rbs @repository: Repository? + # @rbs @values: Hash[Symbol, untyped]? + + # Initialize a new entry with the given repository. + #-- + #: (Repository repository) -> void + def initialize(repository) + @repository = repository + @values = nil + end + + # Fetch the filepath of the value. + #-- + #: () -> String + def filepath + fetch_value(:filepath) + end + + # Fetch the start line of the value. + #-- + #: () -> Integer + def start_line + fetch_value(:start_line) + end + + # Fetch the end line of the value. + #-- + #: () -> Integer + def end_line + fetch_value(:end_line) + end + + # Fetch the start byte offset of the value. + #-- + #: () -> Integer + def start_offset + fetch_value(:start_offset) + end + + # Fetch the end byte offset of the value. + #-- + #: () -> Integer + def end_offset + fetch_value(:end_offset) + end + + # Fetch the start character offset of the value. + #-- + #: () -> Integer + def start_character_offset + fetch_value(:start_character_offset) + end + + # Fetch the end character offset of the value. + #-- + #: () -> Integer + def end_character_offset + fetch_value(:end_character_offset) + end + + # Fetch the start code units offset of the value, for the encoding that + # was configured on the repository. + #-- + #: () -> Integer + def start_code_units_offset + fetch_value(:start_code_units_offset) + end + + # Fetch the end code units offset of the value, for the encoding that was + # configured on the repository. + #-- + #: () -> Integer + def end_code_units_offset + fetch_value(:end_code_units_offset) + end + + # Fetch the start byte column of the value. + #-- + #: () -> Integer + def start_column + fetch_value(:start_column) + end + + # Fetch the end byte column of the value. + #-- + #: () -> Integer + def end_column + fetch_value(:end_column) + end + + # Fetch the start character column of the value. + #-- + #: () -> Integer + def start_character_column + fetch_value(:start_character_column) + end + + # Fetch the end character column of the value. + #-- + #: () -> Integer + def end_character_column + fetch_value(:end_character_column) + end + + # Fetch the start code units column of the value, for the encoding that + # was configured on the repository. + #-- + #: () -> Integer + def start_code_units_column + fetch_value(:start_code_units_column) + end + + # Fetch the end code units column of the value, for the encoding that was + # configured on the repository. + #-- + #: () -> Integer + def end_code_units_column + fetch_value(:end_code_units_column) + end + + # Fetch the leading comments of the value. + #-- + #: () -> Array[CommentsField::Comment] + def leading_comments + fetch_value(:leading_comments) + end + + # Fetch the trailing comments of the value. + #-- + #: () -> Array[CommentsField::Comment] + def trailing_comments + fetch_value(:trailing_comments) + end + + # Fetch the leading and trailing comments of the value. + #-- + #: () -> Array[CommentsField::Comment] + def comments + [*leading_comments, *trailing_comments] + end + + # Reify the values on this entry with the given values. This is an + # internal-only API that is called from the repository when it is time to + # reify the values. + #-- + #: (entry_values values) -> void + def reify!(values) # :nodoc: + @repository = nil + @values = values + end + + private + + # Fetch a value from the entry, raising an error if it is missing. + #-- + #: (Symbol name) -> entry_value + def fetch_value(name) + values.fetch(name) do + raise MissingValueError, "No value for #{name}, make sure the " \ + "repository has been properly configured" + end + end + + # Return the values from the repository, reifying them if necessary. + #-- + #: () -> entry_values + def values + @values || (@repository&.reify!; @values) #: entry_values + end + end + + # Represents the source of a repository that will be reparsed. + class Source + # The value that will need to be reparsed. + attr_reader :value #: untyped + + # Initialize the source with the given value. + #-- + #: (untyped value) -> void + def initialize(value) + @value = value + end + + # Reparse the value and return the parse result. + #-- + #: () -> ParseResult + def result + raise NotImplementedError, "Subclasses must implement #result" + end + + # Create a code units cache for the given encoding. + #-- + #: (Encoding encoding) -> _CodeUnitsCache + def code_units_cache(encoding) + result.code_units_cache(encoding) + end + end + + # A source that is represented by a file path. + class SourceFilepath < Source + # Reparse the file and return the parse result. + #-- + #: () -> ParseResult + def result + Prism.parse_file(value) + end + end + + # A source that is represented by a string. + class SourceString < Source + # Reparse the string and return the parse result. + #-- + #: () -> ParseResult + def result + Prism.parse(value) + end + end + + # A field that represents the file path. + class FilepathField + # The file path that this field represents. + attr_reader :value #: String + + # Initialize a new field with the given file path. + #-- + #: (String value) -> void + def initialize(value) + @value = value + end + + # Fetch the file path. + #-- + #: (_Value _value) -> entry_values + def fields(_value) + { filepath: value } + end + end + + # A field representing the start and end lines. + class LinesField + # Fetches the start and end line of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { start_line: value.start_line, end_line: value.end_line } + end + end + + # A field representing the start and end byte offsets. + class OffsetsField + # Fetches the start and end byte offset of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { start_offset: value.start_offset, end_offset: value.end_offset } + end + end + + # A field representing the start and end character offsets. + class CharacterOffsetsField + # Fetches the start and end character offset of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { + start_character_offset: value.start_character_offset, + end_character_offset: value.end_character_offset + } + end + end + + # A field representing the start and end code unit offsets. + class CodeUnitOffsetsField + # A pointer to the repository object that is used for lazily creating a + # code units cache. + attr_reader :repository #: Repository + + # The associated encoding for the code units. + attr_reader :encoding #: Encoding + + # @rbs @cache: _CodeUnitsCache? + + # Initialize a new field with the associated repository and encoding. + #-- + #: (Repository repository, Encoding encoding) -> void + def initialize(repository, encoding) + @repository = repository + @encoding = encoding + @cache = nil + end + + # Fetches the start and end code units offset of a value for a particular + # encoding. + #-- + #: (_Value value) -> entry_values + def fields(value) + { + start_code_units_offset: value.cached_start_code_units_offset(cache), + end_code_units_offset: value.cached_end_code_units_offset(cache) + } + end + + private + + # Lazily create a code units cache for the associated encoding. + #-- + #: () -> _CodeUnitsCache + def cache + @cache ||= repository.code_units_cache(encoding) + end + end + + # A field representing the start and end byte columns. + class ColumnsField + # Fetches the start and end byte column of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { start_column: value.start_column, end_column: value.end_column } + end + end + + # A field representing the start and end character columns. + class CharacterColumnsField + # Fetches the start and end character column of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { + start_character_column: value.start_character_column, + end_character_column: value.end_character_column + } + end + end + + # A field representing the start and end code unit columns for a specific + # encoding. + class CodeUnitColumnsField + # The repository object that is used for lazily creating a code units + # cache. + attr_reader :repository #: Repository + + # The associated encoding for the code units. + attr_reader :encoding #: Encoding + + # @rbs @cache: _CodeUnitsCache? + + # Initialize a new field with the associated repository and encoding. + #-- + #: (Repository repository, Encoding encoding) -> void + def initialize(repository, encoding) + @repository = repository + @encoding = encoding + @cache = nil + end + + # Fetches the start and end code units column of a value for a particular + # encoding. + #-- + #: (_Value value) -> entry_values + def fields(value) + { + start_code_units_column: value.cached_start_code_units_column(cache), + end_code_units_column: value.cached_end_code_units_column(cache) + } + end + + private + + # Lazily create a code units cache for the associated encoding. + #-- + #: () -> _CodeUnitsCache + def cache + @cache ||= repository.code_units_cache(encoding) + end + end + + # An abstract field used as the parent class of the two comments fields. + class CommentsField + # An object that represents a slice of a comment. + class Comment + # The slice of the comment. + attr_reader :slice #: String + + # Initialize a new comment with the given slice. + # + #: (String slice) -> void + def initialize(slice) + @slice = slice + end + end + + private + + # Create comment objects from the given values. + #-- + #: (entry_value values) -> Array[Comment] + def comments(values) + values.map { |value| Comment.new(value.slice) } + end + end + + # A field representing the leading comments. + class LeadingCommentsField < CommentsField + # Fetches the leading comments of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { leading_comments: comments(value.leading_comments) } + end + end + + # A field representing the trailing comments. + class TrailingCommentsField < CommentsField + # Fetches the trailing comments of a value. + #-- + #: (_Value value) -> entry_values + def fields(value) + { trailing_comments: comments(value.trailing_comments) } + end + end + + # A repository is a configured collection of fields and a set of entries + # that knows how to reparse a source and reify the values. + class Repository + # Raised when multiple fields of the same type are configured on the same + # repository. + class ConfigurationError < StandardError + end + + # The source associated with this repository. This will be either a + # SourceFilepath (the most common use case) or a SourceString. + attr_reader :source #: Source + + # The fields that have been configured on this repository. + attr_reader :fields #: Hash[Symbol, _Field] + + # The entries that have been saved on this repository. + attr_reader :entries #: Hash[Integer, Hash[Symbol, Entry]] + + # Initialize a new repository with the given source. + #-- + #: (Source source) -> void + def initialize(source) + @source = source + @fields = {} + @entries = Hash.new { |hash, node_id| hash[node_id] = {} } + end + + # Create a code units cache for the given encoding from the source. + #-- + #: (Encoding encoding) -> _CodeUnitsCache + def code_units_cache(encoding) + source.code_units_cache(encoding) + end + + # Configure the filepath field for this repository and return self. + #-- + #: () -> self + def filepath + raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath) + field(:filepath, FilepathField.new(source.value)) + end + + # Configure the lines field for this repository and return self. + #-- + #: () -> self + def lines + field(:lines, LinesField.new) + end + + # Configure the offsets field for this repository and return self. + #-- + #: () -> self + def offsets + field(:offsets, OffsetsField.new) + end + + # Configure the character offsets field for this repository and return + # self. + #-- + #: () -> self + def character_offsets + field(:character_offsets, CharacterOffsetsField.new) + end + + # Configure the code unit offsets field for this repository for a specific + # encoding and return self. + #-- + #: (Encoding encoding) -> self + def code_unit_offsets(encoding) + field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding)) + end + + # Configure the columns field for this repository and return self. + #-- + #: () -> self + def columns + field(:columns, ColumnsField.new) + end + + # Configure the character columns field for this repository and return + # self. + #-- + #: () -> self + def character_columns + field(:character_columns, CharacterColumnsField.new) + end + + # Configure the code unit columns field for this repository for a specific + # encoding and return self. + #-- + #: (Encoding encoding) -> self + def code_unit_columns(encoding) + field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding)) + end + + # Configure the leading comments field for this repository and return + # self. + #-- + #: () -> self + def leading_comments + field(:leading_comments, LeadingCommentsField.new) + end + + # Configure the trailing comments field for this repository and return + # self. + #-- + #: () -> self + def trailing_comments + field(:trailing_comments, TrailingCommentsField.new) + end + + # Configure both the leading and trailing comment fields for this + # repository and return self. + #-- + #: () -> self + def comments + leading_comments.trailing_comments + end + + # This method is called from nodes and locations when they want to enter + # themselves into the repository. It it internal-only and meant to be + # called from the #save* APIs. + #-- + #: (Integer node_id, Symbol field_name) -> Entry + def enter(node_id, field_name) # :nodoc: + entry = Entry.new(self) + @entries[node_id][field_name] = entry + entry + end + + # This method is called from the entries in the repository when they need + # to reify their values. It is internal-only and meant to be called from + # the various value APIs. + #-- + #: () -> void + def reify! # :nodoc: + result = source.result + + # Attach the comments if they have been requested as part of the + # configuration of this repository. + if fields.key?(:leading_comments) || fields.key?(:trailing_comments) + result.attach_comments! + end + + queue = [result.value] #: Array[Prism::node] + while (node = queue.shift) + @entries[node.node_id].each do |field_name, entry| + value = node.public_send(field_name) + values = {} #: entry_values + + fields.each_value do |field| + values.merge!(field.fields(value)) + end + + entry.reify!(values) + end + + queue.concat(node.compact_child_nodes) + end + + @entries.clear + end + + private + + # Append the given field to the repository and return the repository so + # that these calls can be chained. + #-- + #: (Symbol name, _Field) -> self + def field(name, value) + raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name) + @fields[name] = value + self + end + end + + # Create a new repository for the given filepath. + #-- + #: (String value) -> Repository + def self.filepath(value) + Repository.new(SourceFilepath.new(value)) + end + + # Create a new repository for the given string. + #-- + #: (String value) -> Repository + def self.string(value) + Repository.new(SourceString.new(value)) + end + end +end diff --git a/lib/prism/string_query.rb b/lib/prism/string_query.rb new file mode 100644 index 0000000000..99ce57e5fe --- /dev/null +++ b/lib/prism/string_query.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled + +module Prism + # Query methods that allow categorizing strings based on their context for + # where they could be valid in a Ruby syntax tree. + class StringQuery + # @rbs! + # def self.local?: (String string) -> bool + # def self.constant?: (String string) -> bool + # def self.method_name?: (String string) -> bool + + # The string that this query is wrapping. + attr_reader :string #: String + + # Initialize a new query with the given string. + #-- + #: (String string) -> void + def initialize(string) + @string = string + end + + # Whether or not this string is a valid local variable name. + #-- + #: () -> bool + def local? + StringQuery.local?(string) + end + + # Whether or not this string is a valid constant name. + #-- + #: () -> bool + def constant? + StringQuery.constant?(string) + end + + # Whether or not this string is a valid method name. + #-- + #: () -> bool + def method_name? + StringQuery.method_name?(string) + end + end +end diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb index 8b75e8a3ab..5a086a7542 100644 --- a/lib/prism/translation.rb +++ b/lib/prism/translation.rb @@ -1,12 +1,19 @@ # frozen_string_literal: true +# :markup: markdown +#-- +# rbs_inline: enabled module Prism # This module is responsible for converting the prism syntax tree into other # syntax trees. module Translation # steep:ignore autoload :Parser, "prism/translation/parser" - autoload :Parser33, "prism/translation/parser33" - autoload :Parser34, "prism/translation/parser34" + autoload :ParserCurrent, "prism/translation/parser_current" + autoload :Parser33, "prism/translation/parser_versions" + autoload :Parser34, "prism/translation/parser_versions" + autoload :Parser35, "prism/translation/parser_versions" + autoload :Parser40, "prism/translation/parser_versions" + autoload :Parser41, "prism/translation/parser_versions" autoload :Ripper, "prism/translation/ripper" autoload :RubyParser, "prism/translation/ruby_parser" end diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb index 0d11b8f566..70031f133a 100644 --- a/lib/prism/translation/parser.rb +++ b/lib/prism/translation/parser.rb @@ -1,6 +1,17 @@ # frozen_string_literal: true - -require "parser" +# :markup: markdown + +begin + required_version = ">= 3.3.7.2" + gem "parser", required_version + require "parser" +rescue LoadError + warn(<<~MSG) + Error: Unable to load parser #{required_version}. \ + Add `gem "parser"` to your Gemfile or run `bundle update parser`. + MSG + exit(1) +end module Prism module Translation @@ -8,6 +19,13 @@ module Prism # whitequark/parser gem's syntax tree. It inherits from the base parser for # the parser gem, and overrides the parse* methods to parse with prism and # then translate. + # + # Note that this version of the parser always parses using the latest + # version of Ruby syntax supported by Prism. If you want specific version + # support, use one of the version-specific subclasses, such as + # `Prism::Translation::Parser34`. If you want to parse using the same + # version of Ruby syntax as the currently running version of Ruby, use + # `Prism::Translation::ParserCurrent`. class Parser < ::Parser::Base Diagnostic = ::Parser::Diagnostic # :nodoc: private_constant :Diagnostic @@ -15,7 +33,7 @@ module Prism # The parser gem has a list of diagnostics with a hard-coded set of error # messages. We create our own diagnostic class in order to set our own # error messages. - class PrismDiagnostic < Diagnostic + class PrismDiagnostic < Diagnostic # :nodoc: # This is the cached message coming from prism. attr_reader :message @@ -28,8 +46,45 @@ module Prism Racc_debug_parser = false # :nodoc: + # The `builder` argument is used to create the parser using our custom builder class by default. + # + # By using the `:parser` keyword argument, you can translate in a way that is compatible with + # the Parser gem using any parser. + # + # For example, in RuboCop for Ruby LSP, the following approach can be used to improve performance + # by reusing a pre-parsed `Prism::ParseLexResult`: + # + # class PrismPreparsed + # def initialize(prism_result) + # @prism_result = prism_result + # end + # + # def parse_lex(source, **options) + # @prism_result + # end + # end + # + # prism_preparsed = PrismPreparsed.new(prism_result) + # + # Prism::Translation::Ruby34.new(builder, parser: prism_preparsed) + # + # In an object passed to the `:parser` keyword argument, the `parse` and `parse_lex` methods + # should be implemented as needed. + # + def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism) + if !builder.is_a?(Prism::Translation::Parser::Builder) + warn(<<~MSG, uplevel: 1, category: :deprecated) + [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \ + `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version. + MSG + end + @parser = parser + + super(builder) + end + def version # :nodoc: - 34 + 41 end # The default encoding for Ruby files is UTF-8. @@ -46,7 +101,7 @@ module Prism source = source_buffer.source offset_cache = build_offset_cache(source) - result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache) + result = unwrap(@parser.parse(source, **prism_options), offset_cache) build_ast(result.value, offset_cache) ensure @@ -59,7 +114,7 @@ module Prism source = source_buffer.source offset_cache = build_offset_cache(source) - result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache) + result = unwrap(@parser.parse(source, **prism_options), offset_cache) [ build_ast(result.value, offset_cache), @@ -78,7 +133,7 @@ module Prism offset_cache = build_offset_cache(source) result = begin - unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache) + unwrap(@parser.parse_lex(source, **prism_options), offset_cache) rescue ::Parser::SyntaxError raise if !recover end @@ -149,17 +204,17 @@ module Prism Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, []) when :embdoc_term Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, []) - when :incomplete_variable_class, :incomplete_variable_class_3_3_0 + when :incomplete_variable_class, :incomplete_variable_class_3_3 location = location.copy(length: location.length + 1) diagnostic_location = build_range(location, offset_cache) Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, []) - when :incomplete_variable_instance, :incomplete_variable_instance_3_3_0 + when :incomplete_variable_instance, :incomplete_variable_instance_3_3 location = location.copy(length: location.length + 1) diagnostic_location = build_range(location, offset_cache) Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, []) - when :invalid_variable_global, :invalid_variable_global_3_3_0 + when :invalid_variable_global, :invalid_variable_global_3_3 Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, []) when :module_in_method Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, []) @@ -280,18 +335,37 @@ module Prism ) end + # Options for how prism should parse/lex the source. + def prism_options + options = { + filepath: @source_buffer.name, + version: convert_for_prism(version), + partial_script: true, + } + # The parser gem always encodes to UTF-8, unless it is binary. + # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107 + options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY + + options + end + # Converts the version format handled by Parser to the format handled by Prism. def convert_for_prism(version) case version when 33 - "3.3.0" + "3.3.1" when 34 "3.4.0" + when 35, 40 + "4.0.0" + when 41 + "4.1.0" else "latest" end end + require_relative "parser/builder" require_relative "parser/compiler" require_relative "parser/lexer" diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb new file mode 100644 index 0000000000..7fc3bba6b7 --- /dev/null +++ b/lib/prism/translation/parser/builder.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true +# :markup: markdown + +module Prism + module Translation + class Parser + # A builder that knows how to convert more modern Ruby syntax + # into whitequark/parser gem's syntax tree. + class Builder < ::Parser::Builders::Default + # It represents the `it` block argument, which is not yet implemented in + # the Parser gem. + def itarg + n(:itarg, [:it], nil) + end + + # The following three lines have been added to support the `it` block + # parameter syntax in the source code below. + # + # if args.type == :itarg + # block_type = :itblock + # args = :it + # + # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155 + def block(method_call, begin_t, args, body, end_t) + _receiver, _selector, *call_args = *method_call + + if method_call.type == :yield + diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)] + end + + last_arg = call_args.last + if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args) + diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)] + end + + if args.type == :itarg + block_type = :itblock + args = :it + elsif args.type == :numargs + block_type = :numblock + args = args.children[0] + else + block_type = :block + end + + if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type) + n(block_type, [ method_call, args, body ], + block_map(method_call.loc.expression, begin_t, end_t)) + else + # Code like "return foo 1 do end" is reduced in a weird sequence. + # Here, method_call is actually (return). + actual_send, = *method_call + block = + n(block_type, [ actual_send, args, body ], + block_map(actual_send.loc.expression, begin_t, end_t)) + + n(method_call.type, [ block ], + method_call.loc.with_expression(join_exprs(method_call, block))) + end + end + + # def foo(&nil); end + # ^^^^ + def blocknilarg(amper_t, nil_t) + n0(:blocknilarg, arg_prefix_map(amper_t, nil_t)) + end + end + end + end +end diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 9437589623..d11db12ae6 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1,13 +1,14 @@ # frozen_string_literal: true +# :markup: markdown module Prism module Translation class Parser # A visitor that knows how to convert a prism syntax tree into the # whitequark/parser gem's syntax tree. - class Compiler < ::Prism::Compiler + class Compiler < ::Prism::Compiler # :nodoc: # Raised when the tree is malformed or there is a bug in the compiler. - class CompilationError < StandardError + class CompilationError < StandardError # :nodoc: end # The Parser::Base instance that is being used to build the AST. @@ -74,7 +75,29 @@ module Prism # [] # ^^ def visit_array_node(node) - builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc)) + if node.opening&.start_with?("%w", "%W", "%i", "%I") + elements = node.elements.flat_map do |element| + if element.is_a?(StringNode) + if element.content.include?("\n") + string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening) + else + [builder.string_internal([element.unescaped, srange(element.content_loc)])] + end + elsif element.is_a?(InterpolatedStringNode) + builder.string_compose( + token(element.opening_loc), + string_nodes_from_interpolation(element, node.opening), + token(element.closing_loc) + ) + else + [visit(element)] + end + end + else + elements = visit_all(node.elements) + end + + builder.array(token(node.opening_loc), elements, token(node.closing_loc)) end # foo => [bar] @@ -90,7 +113,11 @@ module Prism end if node.constant - builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc)) + if visited.empty? + builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc)), token(node.closing_loc)) + else + builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc)) + end else builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc)) end @@ -105,38 +132,45 @@ module Prism # { a: 1 } # ^^^^ def visit_assoc_node(node) - if in_pattern - if node.value.is_a?(ImplicitNode) - if node.key.is_a?(SymbolNode) - builder.match_hash_var([node.key.unescaped, srange(node.key.location)]) + key = node.key + + if node.value.is_a?(ImplicitNode) + if in_pattern + if key.is_a?(SymbolNode) + if key.opening.nil? + builder.match_hash_var([key.unescaped, srange(key.location)]) + else + builder.match_hash_var_from_str(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc)) + end else - builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc)) + builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc)) end else - builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value)) - end - elsif node.value.is_a?(ImplicitNode) - if (value = node.value.value).is_a?(LocalVariableReadNode) - builder.pair_keyword( - [node.key.unescaped, srange(node.key)], - builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar) - ) - else - builder.pair_label([node.key.unescaped, srange(node.key.location)]) + value = node.value.value + + implicit_value = if value.is_a?(CallNode) + builder.call_method(nil, nil, [value.name, srange(value.message_loc)]) + elsif value.is_a?(ConstantReadNode) + builder.const([value.name, srange(key.value_loc)]) + else + builder.ident([value.name, srange(key.value_loc)]).updated(:lvar) + end + + builder.pair_keyword([key.unescaped, srange(key)], implicit_value) end elsif node.operator_loc - builder.pair(visit(node.key), token(node.operator_loc), visit(node.value)) - elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil? - builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value)) + builder.pair(visit(key), token(node.operator_loc), visit(node.value)) + elsif key.is_a?(SymbolNode) && key.opening_loc.nil? + builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value)) else parts = - if node.key.is_a?(SymbolNode) - [builder.string_internal([node.key.unescaped, srange(node.key.value_loc)])] + if key.is_a?(SymbolNode) + [builder.string_internal([key.unescaped, srange(key.value_loc)])] else - visit_all(node.key.parts) + visit_all(key.parts) end - builder.pair_quoted(token(node.key.opening_loc), parts, token(node.key.closing_loc), visit(node.value)) + builder.pair_quoted(token(key.opening_loc), parts, token(key.closing_loc), visit(node.value)) end end @@ -146,7 +180,9 @@ module Prism # { **foo } # ^^^^^ def visit_assoc_splat_node(node) - if node.value.nil? && forwarding.include?(:**) + if in_pattern + builder.match_rest(token(node.operator_loc), token(node.value&.location)) + elsif node.value.nil? && forwarding.include?(:**) builder.forwarded_kwrestarg(token(node.operator_loc)) else builder.kwsplat(token(node.operator_loc), visit(node.value)) @@ -167,17 +203,24 @@ module Prism if (rescue_clause = node.rescue_clause) begin find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset - find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.consequent&.location&.start_offset || (find_start_offset + 1)) + find_end_offset = ( + rescue_clause.statements&.location&.start_offset || + rescue_clause.subsequent&.location&.start_offset || + node.else_clause&.location&.start_offset || + node.ensure_clause&.location&.start_offset || + node.end_keyword_loc&.start_offset || + find_start_offset + 1 + ) rescue_bodies << builder.rescue_body( token(rescue_clause.keyword_loc), rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil, token(rescue_clause.operator_loc), visit(rescue_clause.reference), - srange_find(find_start_offset, find_end_offset, [";"]), + srange_semicolon(find_start_offset, find_end_offset), visit(rescue_clause.statements) ) - end until (rescue_clause = rescue_clause.consequent).nil? + end until (rescue_clause = rescue_clause.subsequent).nil? end begin_body = @@ -254,11 +297,6 @@ module Prism if node.call_operator_loc.nil? case name - when :-@ - case (receiver = node.receiver).type - when :integer_node, :float_node, :rational_node, :imaginary_node - return visit(numeric_negate(node.message_loc, receiver)) - end when :! return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block) when :=~ @@ -280,7 +318,7 @@ module Prism visit_all(arguments), token(node.closing_loc), ), - srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]), + token(node.equal_loc), visit(node.arguments.arguments.last) ), block @@ -297,7 +335,7 @@ module Prism if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil? builder.assign( builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)), - srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]), + token(node.equal_loc), visit(node.arguments.arguments.last) ) else @@ -328,18 +366,48 @@ module Prism [], nil ), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # foo.bar &&= baz # ^^^^^^^^^^^^^^^ - alias visit_call_and_write_node visit_call_operator_write_node + def visit_call_and_write_node(node) + call_operator_loc = node.call_operator_loc + + builder.op_assign( + builder.call_method( + visit(node.receiver), + call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)], + node.message_loc ? [node.read_name, srange(node.message_loc)] : nil, + nil, + [], + nil + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # foo.bar ||= baz # ^^^^^^^^^^^^^^^ - alias visit_call_or_write_node visit_call_operator_write_node + def visit_call_or_write_node(node) + call_operator_loc = node.call_operator_loc + + builder.op_assign( + builder.call_method( + visit(node.receiver), + call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)], + node.message_loc ? [node.read_name, srange(node.message_loc)] : nil, + nil, + [], + nil + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # foo.bar, = 1 # ^^^^^^^ @@ -366,8 +434,8 @@ module Prism token(node.case_keyword_loc), visit(node.predicate), visit_all(node.conditions), - token(node.consequent&.else_keyword_loc), - visit(node.consequent), + token(node.else_clause&.else_keyword_loc), + visit(node.else_clause), token(node.end_keyword_loc) ) end @@ -379,8 +447,8 @@ module Prism token(node.case_keyword_loc), visit(node.predicate), visit_all(node.conditions), - token(node.consequent&.else_keyword_loc), - visit(node.consequent), + token(node.else_clause&.else_keyword_loc), + visit(node.else_clause), token(node.end_keyword_loc) ) end @@ -419,18 +487,30 @@ module Prism def visit_class_variable_operator_write_node(node) builder.op_assign( builder.assignable(builder.cvar(token(node.name_loc))), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # @@foo &&= bar # ^^^^^^^^^^^^^ - alias visit_class_variable_and_write_node visit_class_variable_operator_write_node + def visit_class_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.cvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # @@foo ||= bar # ^^^^^^^^^^^^^ - alias visit_class_variable_or_write_node visit_class_variable_operator_write_node + def visit_class_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.cvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # @@foo, = bar # ^^^^^ @@ -458,18 +538,30 @@ module Prism def visit_constant_operator_write_node(node) builder.op_assign( builder.assignable(builder.const([node.name, srange(node.name_loc)])), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # Foo &&= bar # ^^^^^^^^^^^^ - alias visit_constant_and_write_node visit_constant_operator_write_node + def visit_constant_and_write_node(node) + builder.op_assign( + builder.assignable(builder.const([node.name, srange(node.name_loc)])), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # Foo ||= bar # ^^^^^^^^^^^^ - alias visit_constant_or_write_node visit_constant_operator_write_node + def visit_constant_or_write_node(node) + builder.op_assign( + builder.assignable(builder.const([node.name, srange(node.name_loc)])), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # Foo, = bar # ^^^ @@ -483,13 +575,13 @@ module Prism if node.parent.nil? builder.const_global( token(node.delimiter_loc), - [node.child.name, srange(node.child.location)] + [node.name, srange(node.name_loc)] ) else builder.const_fetch( visit(node.parent), token(node.delimiter_loc), - [node.child.name, srange(node.child.location)] + [node.name, srange(node.name_loc)] ) end end @@ -512,18 +604,30 @@ module Prism def visit_constant_path_operator_write_node(node) builder.op_assign( builder.assignable(visit(node.target)), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # Foo::Bar &&= baz # ^^^^^^^^^^^^^^^^ - alias visit_constant_path_and_write_node visit_constant_path_operator_write_node + def visit_constant_path_and_write_node(node) + builder.op_assign( + builder.assignable(visit(node.target)), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # Foo::Bar ||= baz # ^^^^^^^^^^^^^^^^ - alias visit_constant_path_or_write_node visit_constant_path_operator_write_node + def visit_constant_path_or_write_node(node) + builder.op_assign( + builder.assignable(visit(node.target)), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # Foo::Bar, = baz # ^^^^^^^^ @@ -584,13 +688,37 @@ module Prism # defined?(a) # ^^^^^^^^^^^ def visit_defined_node(node) - builder.keyword_cmd( - :defined?, - token(node.keyword_loc), - token(node.lparen_loc), - [visit(node.value)], - token(node.rparen_loc) - ) + # Very weird circumstances here where something like: + # + # defined? + # (1) + # + # gets parsed in Ruby as having only the `1` expression but in parser + # it gets parsed as having a begin. In this case we need to synthesize + # that begin to match parser's behavior. + if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n") + builder.keyword_cmd( + :defined?, + token(node.keyword_loc), + nil, + [ + builder.begin( + token(node.lparen_loc), + visit(node.value), + token(node.rparen_loc) + ) + ], + nil + ) + else + builder.keyword_cmd( + :defined?, + token(node.keyword_loc), + token(node.lparen_loc), + [visit(node.value)], + token(node.rparen_loc) + ) + end end # if foo then bar else baz end @@ -653,10 +781,10 @@ module Prism visit(node.index), token(node.in_keyword_loc), visit(node.collection), - if node.do_keyword_loc - token(node.do_keyword_loc) + if (do_keyword_loc = node.do_keyword_loc) + token(do_keyword_loc) else - srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, [";"]) + srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset) end, visit(node.statements), token(node.end_keyword_loc) @@ -711,18 +839,30 @@ module Prism def visit_global_variable_operator_write_node(node) builder.op_assign( builder.assignable(builder.gvar(token(node.name_loc))), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # $foo &&= bar # ^^^^^^^^^^^^ - alias visit_global_variable_and_write_node visit_global_variable_operator_write_node + def visit_global_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.gvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # $foo ||= bar # ^^^^^^^^^^^^ - alias visit_global_variable_or_write_node visit_global_variable_operator_write_node + def visit_global_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.gvar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # $foo, = bar # ^^^^ @@ -766,26 +906,26 @@ module Prism visit(node.predicate), token(node.then_keyword_loc), visit(node.statements), - token(node.consequent.else_keyword_loc), - visit(node.consequent) + token(node.subsequent.else_keyword_loc), + visit(node.subsequent) ) elsif node.if_keyword_loc.start_offset == node.location.start_offset builder.condition( token(node.if_keyword_loc), visit(node.predicate), - if node.then_keyword_loc - token(node.then_keyword_loc) + if (then_keyword_loc = node.then_keyword_loc) + token(then_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"]) + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset) end, visit(node.statements), - case node.consequent + case node.subsequent when IfNode - token(node.consequent.if_keyword_loc) + token(node.subsequent.if_keyword_loc) when ElseNode - token(node.consequent.else_keyword_loc) + token(node.subsequent.else_keyword_loc) end, - visit(node.consequent), + visit(node.subsequent), if node.if_keyword != "elsif" token(node.end_keyword_loc) end @@ -793,7 +933,7 @@ module Prism else builder.condition_mod( visit(node.statements), - visit(node.consequent), + visit(node.subsequent), token(node.if_keyword_loc), visit(node.predicate) ) @@ -803,7 +943,7 @@ module Prism # 1i # ^^ def visit_imaginary_node(node) - visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)])) + visit_numeric(node, builder.complex([Complex(0, node.numeric.value), srange(node.location)])) end # { foo: } @@ -839,7 +979,11 @@ module Prism token(node.in_loc), pattern, guard, - srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset || node.location.end_offset, [";", "then"]), + if (then_loc = node.then_loc) + token(then_loc) + else + srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset) + end, visit(node.statements) ) end @@ -857,18 +1001,46 @@ module Prism visit_all(arguments), token(node.closing_loc) ), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # foo[bar] &&= baz # ^^^^^^^^^^^^^^^^ - alias visit_index_and_write_node visit_index_operator_write_node + def visit_index_and_write_node(node) + arguments = node.arguments&.arguments || [] + arguments << node.block if node.block + + builder.op_assign( + builder.index( + visit(node.receiver), + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc) + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # foo[bar] ||= baz # ^^^^^^^^^^^^^^^^ - alias visit_index_or_write_node visit_index_operator_write_node + def visit_index_or_write_node(node) + arguments = node.arguments&.arguments || [] + arguments << node.block if node.block + + builder.op_assign( + builder.index( + visit(node.receiver), + token(node.opening_loc), + visit_all(arguments), + token(node.closing_loc) + ), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # foo[bar], = 1 # ^^^^^^^^ @@ -876,7 +1048,7 @@ module Prism builder.index_asgn( visit(node.receiver), token(node.opening_loc), - visit_all(node.arguments.arguments), + visit_all(node.arguments&.arguments || []), token(node.closing_loc), ) end @@ -902,18 +1074,30 @@ module Prism def visit_instance_variable_operator_write_node(node) builder.op_assign( builder.assignable(builder.ivar(token(node.name_loc))), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # @foo &&= bar # ^^^^^^^^^^^^ - alias visit_instance_variable_and_write_node visit_instance_variable_operator_write_node + def visit_instance_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.ivar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # @foo ||= bar # ^^^^^^^^^^^^ - alias visit_instance_variable_or_write_node visit_instance_variable_operator_write_node + def visit_instance_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.ivar(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # @foo, = bar # ^^^^ @@ -932,7 +1116,7 @@ module Prism def visit_interpolated_regular_expression_node(node) builder.regexp_compose( token(node.opening_loc), - visit_all(node.parts), + string_nodes_from_interpolation(node, node.opening), [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)], builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)]) ) @@ -946,61 +1130,12 @@ module Prism # ^^^^^^^^^^^^ def visit_interpolated_string_node(node) if node.heredoc? - children, closing = visit_heredoc(node) - opening = token(node.opening_loc) - - start_offset = node.opening_loc.end_offset + 1 - end_offset = node.parts.first.location.start_offset - - # In the below case, the offsets should be the same: - # - # <<~HEREDOC - # a #{b} - # HEREDOC - # - # But in this case, the end_offset would be greater than the start_offset: - # - # <<~HEREDOC - # #{b} - # HEREDOC - # - # So we need to make sure the result node's heredoc range is correct, without updating the children - result = if start_offset < end_offset - # We need to add a padding string to ensure that the heredoc has correct range for its body - padding_string_node = builder.string_internal(["", srange_offsets(start_offset, end_offset)]) - node_with_correct_location = builder.string_compose(opening, [padding_string_node, *children], closing) - # But the padding string should not be included in the final AST, so we need to update the result's children - node_with_correct_location.updated(:dstr, children) - else - builder.string_compose(opening, children, closing) - end - - return result - end - - parts = if node.parts.one? { |part| part.type == :string_node } - node.parts.flat_map do |node| - if node.type == :string_node && node.unescaped.lines.count >= 2 - start_offset = node.content_loc.start_offset - - node.unescaped.lines.map do |line| - end_offset = start_offset + line.length - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([line, offsets]) - end - else - visit(node) - end - end - else - visit_all(node.parts) + return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) } end builder.string_compose( token(node.opening_loc), - parts, + string_nodes_from_interpolation(node, node.opening), token(node.closing_loc) ) end @@ -1010,7 +1145,7 @@ module Prism def visit_interpolated_symbol_node(node) builder.symbol_compose( token(node.opening_loc), - visit_all(node.parts), + string_nodes_from_interpolation(node, node.opening), token(node.closing_loc) ) end @@ -1019,14 +1154,35 @@ module Prism # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) if node.heredoc? - children, closing = visit_heredoc(node) - builder.xstring_compose(token(node.opening_loc), children, closing) + return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) } + end + + builder.xstring_compose( + token(node.opening_loc), + string_nodes_from_interpolation(node, node.opening), + token(node.closing_loc) + ) + end + + # -> { it } + # ^^ + def visit_it_local_variable_read_node(node) + builder.ident([:it, srange(node.location)]).updated(:lvar) + end + + # -> { it } + # ^^^^^^^^^ + def visit_it_parameters_node(node) + # FIXME: The builder _should_ always be a subclass of the prism builder. + # Currently RuboCop passes in its own builder that always inherits from the + # parser builder (which is lacking the `itarg` method). Once rubocop-ast + # opts in to use the custom prism builder a warning can be emitted when + # it is not the expected class, and eventually raise. + # https://github.com/rubocop/rubocop-ast/pull/354 + if builder.is_a?(Translation::Parser::Builder) + builder.itarg else - builder.xstring_compose( - token(node.opening_loc), - visit_all(node.parts), - token(node.closing_loc) - ) + builder.args(nil, [], nil, false) end end @@ -1052,13 +1208,14 @@ module Prism # ^^^^^ def visit_lambda_node(node) parameters = node.parameters + implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode) builder.block( builder.call_lambda(token(node.operator_loc)), [node.opening, srange(node.opening_loc)], if parameters.nil? builder.args(nil, [], nil, false) - elsif node.parameters.is_a?(NumberedParametersNode) + elsif implicit_parameters visit(node.parameters) else builder.args( @@ -1068,7 +1225,7 @@ module Prism false ) end, - node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))), + visit(node.body), [node.closing, srange(node.closing_loc)] ) end @@ -1094,18 +1251,30 @@ module Prism def visit_local_variable_operator_write_node(node) builder.op_assign( builder.assignable(builder.ident(token(node.name_loc))), - [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)], visit(node.value) ) end # foo &&= bar # ^^^^^^^^^^^ - alias visit_local_variable_and_write_node visit_local_variable_operator_write_node + def visit_local_variable_and_write_node(node) + builder.op_assign( + builder.assignable(builder.ident(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # foo ||= bar # ^^^^^^^^^^^ - alias visit_local_variable_or_write_node visit_local_variable_operator_write_node + def visit_local_variable_or_write_node(node) + builder.op_assign( + builder.assignable(builder.ident(token(node.name_loc))), + [node.operator_loc.slice.chomp("="), srange(node.operator_loc)], + visit(node.value) + ) + end # foo, = bar # ^^^ @@ -1150,7 +1319,7 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. The parser gem doesn't have such a concept, so # we invent our own here. - def visit_missing_node(node) + def visit_error_recovery_node(node) ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location))) end @@ -1168,13 +1337,9 @@ module Prism # foo, bar = baz # ^^^^^^^^ def visit_multi_target_node(node) - elements = [*node.lefts] - elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) - elements.concat(node.rights) - builder.multi_lhs( token(node.lparen_loc), - visit_all(elements), + visit_all(multi_target_elements(node)), token(node.rparen_loc) ) end @@ -1182,9 +1347,11 @@ module Prism # foo, bar = baz # ^^^^^^^^^^^^^^ def visit_multi_write_node(node) - elements = [*node.lefts] - elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) - elements.concat(node.rights) + elements = multi_target_elements(node) + + if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest + elements = multi_target_elements(elements.first) + end builder.multi_assign( builder.multi_lhs( @@ -1218,6 +1385,12 @@ module Prism builder.nil(token(node.location)) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc)) + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) @@ -1265,12 +1438,12 @@ module Prism if node.requireds.any? node.requireds.each do |required| - if required.is_a?(RequiredParameterNode) - params << visit(required) - else - compiler = copy_compiler(in_destructure: true) - params << required.accept(compiler) - end + params << + if required.is_a?(RequiredParameterNode) + visit(required) + else + required.accept(copy_compiler(in_destructure: true)) + end end end @@ -1279,12 +1452,12 @@ module Prism if node.posts.any? node.posts.each do |post| - if post.is_a?(RequiredParameterNode) - params << visit(post) - else - compiler = copy_compiler(in_destructure: true) - params << post.accept(compiler) - end + params << + if post.is_a?(RequiredParameterNode) + visit(post) + else + post.accept(copy_compiler(in_destructure: true)) + end end end @@ -1310,7 +1483,8 @@ module Prism # foo => ^(bar) # ^^^^^^ def visit_pinned_expression_node(node) - expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc)) + parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest + expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc)) builder.pin(token(node.operator_loc), expression) end @@ -1370,7 +1544,7 @@ module Prism # 1r # ^^ def visit_rational_node(node) - visit_numeric(node, builder.rational([rational_value(node), srange(node.location)])) + visit_numeric(node, builder.rational([node.value, srange(node.location)])) end # redo @@ -1382,9 +1556,18 @@ module Prism # /foo/ # ^^^^^ def visit_regular_expression_node(node) + parts = + if node.content == "" + [] + elsif node.content.include?("\n") + string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening) + else + [builder.string_internal([node.unescaped, srange(node.content_loc)])] + end + builder.regexp_compose( token(node.opening_loc), - [builder.string_internal(token(node.content_loc))], + parts, [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)], builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)]) ) @@ -1530,24 +1713,18 @@ module Prism # ^^^^^ def visit_string_node(node) if node.heredoc? - children, closing = visit_heredoc(node.to_interpolated) - builder.string_compose(token(node.opening_loc), children, closing) + visit_heredoc(node.to_interpolated) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) } elsif node.opening == "?" builder.character([node.unescaped, srange(node.location)]) + elsif node.opening&.start_with?("%") && node.unescaped.empty? + builder.string_compose(token(node.opening_loc), [], token(node.closing_loc)) else - parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1 - [builder.string_internal([node.unescaped, srange(node.content_loc)])] - else - start_offset = node.content_loc.start_offset - - [node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line| - end_offset = start_offset + content_line.length - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([unescaped_line, offsets]) + parts = + if node.content.include?("\n") + string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening) + else + [builder.string_internal([node.unescaped, srange(node.content_loc)])] end - end builder.string_compose( token(node.opening_loc), @@ -1590,19 +1767,14 @@ module Prism builder.symbol([node.unescaped, srange(node.location)]) end else - parts = if node.value.lines.one? - [builder.string_internal([node.unescaped, srange(node.value_loc)])] - else - start_offset = node.value_loc.start_offset - - node.value.lines.map do |line| - end_offset = start_offset + line.length - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([line, offsets]) + parts = + if node.value_loc.nil? + [] + elsif node.value.include?("\n") + string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening) + else + [builder.string_internal([node.unescaped, srange(node.value_loc)])] end - end builder.symbol_compose( token(node.opening_loc), @@ -1634,19 +1806,19 @@ module Prism builder.condition( token(node.keyword_loc), visit(node.predicate), - if node.then_keyword_loc - token(node.then_keyword_loc) + if (then_keyword_loc = node.then_keyword_loc) + token(then_keyword_loc) else - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"]) + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset) end, - visit(node.consequent), - token(node.consequent&.else_keyword_loc), + visit(node.else_clause), + token(node.else_clause&.else_keyword_loc), visit(node.statements), token(node.end_keyword_loc) ) else builder.condition_mod( - visit(node.consequent), + visit(node.else_clause), visit(node.statements), token(node.keyword_loc), visit(node.predicate) @@ -1655,7 +1827,7 @@ module Prism end # until foo; bar end - # ^^^^^^^^^^^^^^^^^ + # ^^^^^^^^^^^^^^^^^^ # # bar until foo # ^^^^^^^^^^^^^ @@ -1665,7 +1837,11 @@ module Prism :until, token(node.keyword_loc), visit(node.predicate), - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]), + if (do_keyword_loc = node.do_keyword_loc) + token(do_keyword_loc) + else + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset) + end, visit(node.statements), token(node.closing_loc) ) @@ -1685,10 +1861,10 @@ module Prism builder.when( token(node.keyword_loc), visit_all(node.conditions), - if node.then_keyword_loc - token(node.then_keyword_loc) + if (then_keyword_loc = node.then_keyword_loc) + token(then_keyword_loc) else - srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"]) + srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset) end, visit(node.statements) ) @@ -1705,7 +1881,11 @@ module Prism :while, token(node.keyword_loc), visit(node.predicate), - srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]), + if (do_keyword_loc = node.do_keyword_loc) + token(do_keyword_loc) + else + srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset) + end, visit(node.statements), token(node.closing_loc) ) @@ -1723,29 +1903,23 @@ module Prism # ^^^^^ def visit_x_string_node(node) if node.heredoc? - children, closing = visit_heredoc(node.to_interpolated) - builder.xstring_compose(token(node.opening_loc), children, closing) - else - parts = if node.unescaped.lines.one? - [builder.string_internal([node.unescaped, srange(node.content_loc)])] - else - start_offset = node.content_loc.start_offset - - node.unescaped.lines.map do |line| - end_offset = start_offset + line.length - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset + return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) } + end - builder.string_internal([line, offsets]) - end + parts = + if node.content == "" + [] + elsif node.content.include?("\n") + string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening) + else + [builder.string_internal([node.unescaped, srange(node.content_loc)])] end - builder.xstring_compose( - token(node.opening_loc), - parts, - token(node.closing_loc) - ) - end + builder.xstring_compose( + token(node.opening_loc), + parts, + token(node.closing_loc) + ) end # yield @@ -1786,24 +1960,12 @@ module Prism forwarding end - # Because we have mutated the AST to allow for newlines in the middle of - # a rational, we need to manually handle the value here. - def imaginary_value(node) - Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value) - end - - # Negate the value of a numeric node. This is a special case where you - # have a negative sign on one line and then a number on the next line. - # In normal Ruby, this will always be a method call. The parser gem, - # however, marks this as a numeric literal. We have to massage the tree - # here to get it into the correct form. - def numeric_negate(message_loc, receiver) - case receiver.type - when :integer_node, :float_node - receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location)) - when :rational_node, :imaginary_node - receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location)) - end + # Returns the set of targets for a MultiTargetNode or a MultiWriteNode. + def multi_target_elements(node) + elements = [*node.lefts] + elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode) + elements.concat(node.rights) + elements end # Blocks can have a special set of parameters that automatically expand @@ -1820,16 +1982,6 @@ module Prism parameters.block.nil? end - # Because we have mutated the AST to allow for newlines in the middle of - # a rational, we need to manually handle the value here. - def rational_value(node) - if node.numeric.is_a?(IntegerNode) - Rational(node.numeric.value) - else - Rational(node.slice.gsub(/\s/, "").chomp("r")) - end - end - # Locations in the parser gem AST are generated using this class. We # store a reference to its constant to make it slightly faster to look # up. @@ -1845,14 +1997,16 @@ module Prism Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset]) end - # Constructs a new source range by finding the given tokens between the - # given start offset and end offset. If the needle is not found, it - # returns nil. - def srange_find(start_offset, end_offset, tokens) - tokens.find do |token| - next unless (index = source_buffer.source.byteslice(start_offset...end_offset).index(token)) - offset = start_offset + index - return [token, Range.new(source_buffer, offset_cache[offset], offset_cache[offset + token.length])] + # Constructs a new source range by finding a semicolon between the given + # start offset and end offset. If the semicolon is not found, it returns + # nil. Importantly it does not search past newlines or comments. + # + # Note that end_offset is allowed to be nil, in which case this will + # search until the end of the string. + def srange_semicolon(start_offset, end_offset) + if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/]) + final_offset = start_offset + match.bytesize + [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])] end end @@ -1865,20 +2019,22 @@ module Prism def visit_block(call, block) if block parameters = block.parameters + implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode) builder.block( call, token(block.opening_loc), if parameters.nil? builder.args(nil, [], nil, false) - elsif parameters.is_a?(NumberedParametersNode) + elsif implicit_parameters visit(parameters) else builder.args( token(parameters.opening_loc), if procarg0?(parameters.parameters) parameter = parameters.parameters.requireds.first - [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals)) + visited = parameter.is_a?(RequiredParameterNode) ? visit(parameter) : parameter.accept(copy_compiler(in_destructure: true)) + [builder.procarg0(visited)].concat(visit_all(parameters.locals)) else visit(parameters) end, @@ -1886,7 +2042,7 @@ module Prism false ) end, - block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))), + visit(block.body), token(block.closing_loc) ) else @@ -1897,28 +2053,21 @@ module Prism # Visit a heredoc that can be either a string or an xstring. def visit_heredoc(node) children = Array.new + indented = false + + # If this is a dedenting heredoc, then we need to insert the opening + # content into the children as well. + if node.opening.start_with?("<<~") && node.parts.length > 0 && !node.parts.first.is_a?(StringNode) + location = node.parts.first.location + location = location.copy(start_offset: location.start_offset - location.start_line_slice.bytesize) + children << builder.string_internal(token(location)) + indented = true + end + node.parts.each do |part| pushing = - if part.is_a?(StringNode) && part.unescaped.include?("\n") - unescaped = part.unescaped.lines(chomp: true) - escaped = part.content.lines(chomp: true) - - escaped_lengths = - if node.opening.end_with?("'") - escaped.map { |line| line.bytesize + 1 } - else - escaped.chunk_while { |before, after| before.match?(/(?<!\\)\\$/) }.map { |line| line.join.bytesize + line.length } - end - - start_offset = part.location.start_offset - end_offset = nil - - unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length| - end_offset = start_offset + (escaped_length || 0) - inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)]) - start_offset = end_offset - inner_part - end + if part.is_a?(StringNode) && part.content.include?("\n") + string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening) else [visit(part)] end @@ -1927,7 +2076,12 @@ module Prism if child.type == :str && child.children.last == "" # nothing elsif child.type == :str && children.last && children.last.type == :str && !children.last.children.first.end_with?("\n") - children.last.children.first << child.children.first + appendee = children[-1] + + location = appendee.loc + location = location.with_expression(location.expression.join(child.loc.expression)) + + children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location) else children << child end @@ -1936,8 +2090,10 @@ module Prism closing = node.closing closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))] + composed = yield children, closing_t - [children, closing_t] + composed = composed.updated(nil, children[1..-1]) if indented + composed end # Visit a numeric node and account for the optional sign. @@ -1961,6 +2117,102 @@ module Prism parser.pattern_variables.pop end end + + # When the content of a string node is split across multiple lines, the + # parser gem creates individual string nodes for each line the content is part of. + def string_nodes_from_interpolation(node, opening) + node.parts.flat_map do |part| + if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil? + string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening) + else + visit(part) + end + end + end + + # Create parser string nodes from a single prism node. The parser gem + # "glues" strings together when a line continuation is encountered. + def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening) + unescaped = unescaped.lines + escaped = escaped.lines + percent_array = opening&.start_with?("%w", "%W", "%i", "%I") + regex = opening == "/" || opening&.start_with?("%r") + + # Non-interpolating strings + if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i") + current_length = 0 + current_line = +"" + + escaped.filter_map.with_index do |escaped_line, index| + unescaped_line = unescaped.fetch(index, "") + current_length += escaped_line.bytesize + current_line << unescaped_line + + # Glue line continuations together. Only %w and %i arrays can contain these. + if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd? + next unless index == escaped.count - 1 + end + s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)]) + start_offset += escaped_line.bytesize + current_line = +"" + current_length = 0 + s + end + else + escaped_lengths = [] + normalized_lengths = [] + # Keeps track of where an unescaped line should start a new token. An unescaped + # \n would otherwise be indistinguishable from the actual newline at the end of + # of the line. The parser gem only emits a new string node at "real" newlines, + # line continuations don't start a new node as well. + do_next_tokens = [] + + escaped + .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false } + .each do |lines| + escaped_lengths << lines.sum(&:bytesize) + + unescaped_lines_count = + if regex + 0 # Will always be preserved as is + else + lines.sum do |line| + count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? } + count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0 + count + end + end + + extra = 1 + extra = lines.count if percent_array # Account for line continuations in percent arrays + + normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0)) + normalized_lengths[-1] = lines.sum { |line| line.bytesize } + do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false)) + do_next_tokens[-1] = true + end + + current_line = +"" + current_normalized_length = 0 + + emitted_count = 0 + unescaped.filter_map.with_index do |unescaped_line, index| + current_line << unescaped_line + current_normalized_length += normalized_lengths.fetch(index, 0) + + if do_next_tokens[index] + inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)]) + start_offset += escaped_lengths.fetch(emitted_count, 0) + current_line = +"" + current_normalized_length = 0 + emitted_count += 1 + inner_part + else + nil + end + end + end + end end end end diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 9d7caae0ba..e82042867f 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -1,21 +1,25 @@ # frozen_string_literal: true +# :markup: markdown + +require "strscan" +require_relative "../../polyfill/append_as_bytes" +require_relative "../../polyfill/scan_byte" module Prism module Translation class Parser # Accepts a list of prism tokens and converts them into the expected # format for the parser gem. - class Lexer + class Lexer # :nodoc: + # These tokens are always skipped + TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF]) + private_constant :TYPES_ALWAYS_SKIP + # The direct translating of types between the two lexers. TYPES = { # These tokens should never appear in the output of the lexer. - EOF: nil, - MISSING: nil, - NOT_PROVIDED: nil, - IGNORED_NEWLINE: nil, EMBDOC_END: nil, EMBDOC_LINE: nil, - __END__: nil, # These tokens have more or less direct mappings. AMPERSAND: :tAMPER2, @@ -83,6 +87,7 @@ module Prism KEYWORD_DEF: :kDEF, KEYWORD_DEFINED: :kDEFINED, KEYWORD_DO: :kDO, + KEYWORD_DO_BLOCK: :kDO_BLOCK, KEYWORD_DO_LOOP: :kDO_COND, KEYWORD_END: :kEND, KEYWORD_END_UPCASE: :klEND, @@ -134,7 +139,7 @@ module Prism MINUS_GREATER: :tLAMBDA, NEWLINE: :tNL, NUMBERED_REFERENCE: :tNTH_REF, - PARENTHESIS_LEFT: :tLPAREN, + PARENTHESIS_LEFT: :tLPAREN2, PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG, PARENTHESIS_RIGHT: :tRPAREN, PERCENT: :tPERCENT, @@ -173,7 +178,7 @@ module Prism UMINUS_NUM: :tUNARY_NUM, UPLUS: :tUPLUS, USTAR: :tSTAR, - USTAR_STAR: :tPOW, + USTAR_STAR: :tDSTAR, WORDS_SEP: :tSPACE } @@ -184,10 +189,31 @@ module Prism # without them. We should find another way to do this, but in the # meantime we'll hide them from the documentation and mark them as # private constants. - EXPR_BEG = 0x1 # :nodoc: - EXPR_LABEL = 0x400 # :nodoc: + EXPR_BEG = 0x1 + EXPR_LABEL = 0x400 + + # It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`. + # + # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned + # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046 + LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]) + + # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem. + # The following token types are listed as those classified as `tLPAREN`. + LPAREN_CONVERSION_TOKEN_TYPES = Set.new([ + :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3, + :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY + ]) - private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL + # Types of tokens that are allowed to continue a method call with comments in-between. + # For these, the parser gem doesn't emit a newline token after the last comment. + COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT]) + private_constant :COMMENT_CONTINUATION_TYPES + + # Heredocs are complex and require us to keep track of a bit of info to refer to later + HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true) + + private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL, :LAMBDA_TOKEN_TYPES, :LPAREN_CONVERSION_TOKEN_TYPES, :HeredocData # The Parser::Source::Buffer that the tokens were lexed from. attr_reader :source_buffer @@ -207,7 +233,7 @@ module Prism @offset_cache = offset_cache end - Range = ::Parser::Source::Range # :nodoc: + Range = ::Parser::Source::Range private_constant :Range # Convert the prism tokens into the expected format for the parser gem. @@ -217,39 +243,78 @@ module Prism index = 0 length = lexed.length - heredoc_identifier_stack = [] + heredoc_stack = [] + quote_stack = [] + + # The parser gem emits the newline tokens for comments out of order. This saves + # that token location to emit at a later time to properly line everything up. + # https://github.com/whitequark/parser/issues/1025 + comment_newline_location = nil while index < length token, state = lexed[index] index += 1 - next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) + next if TYPES_ALWAYS_SKIP.include?(token.type) type = TYPES.fetch(token.type) value = token.value - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset]) + location = range(token.location.start_offset, token.location.end_offset) case type + when :kDO + nearest_lambda_token = tokens.reverse_each.find do |token| + LAMBDA_TOKEN_TYPES.include?(token.first) + end + + if nearest_lambda_token&.first == :tLAMBDA + type = :kDO_LAMBDA + end when :tCHARACTER value.delete_prefix!("?") + # Character literals behave similar to double-quoted strings. We can use the same escaping mechanism. + value = unescape_string(value, "?") when :tCOMMENT if token.type == :EMBDOC_BEGIN - start_index = index - while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1) + while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1) value += next_token.value index += 1 end - if start_index != index - value += next_token.value - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset]) - index += 1 - end + value += next_token.value + location = range(token.location.start_offset, next_token.location.end_offset) + index += 1 else - value.chomp! - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1]) + is_at_eol = value.chomp!.nil? + location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1)) + + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + next_token, _ = lexed[index] + + is_inline_comment = prev_token&.location&.start_line == token.location.start_line + if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type) + tokens << [:tCOMMENT, [value, location]] + + nl_location = range(token.location.end_offset - 1, token.location.end_offset) + tokens << [:tNL, [nil, nl_location]] + next + elsif is_inline_comment && next_token&.type == :COMMENT + comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset) + elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type) + tokens << [:tCOMMENT, [value, location]] + tokens << [:tNL, [nil, comment_newline_location]] + comment_newline_location = nil + next + end end when :tNL + next_token, _ = lexed[index] + # Newlines after comments are emitted out of order. + if next_token&.type == :COMMENT + comment_newline_location = location + next + end + value = nil when :tFLOAT value = parse_float(value) @@ -257,8 +322,8 @@ module Prism value = parse_complex(value) when :tINTEGER if value.start_with?("+") - tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]] - location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset]) + tokens << [:tUNARY_NUM, ["+", range(token.location.start_offset, token.location.start_offset + 1)]] + location = range(token.location.start_offset + 1, token.location.end_offset) end value = parse_integer(value) @@ -268,6 +333,8 @@ module Prism value.chomp!(":") when :tLCURLY type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL + when :tLPAREN2 + type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0)) when :tNTH_REF value = parse_integer(value.delete_prefix("$")) when :tOP_ASGN @@ -275,92 +342,196 @@ module Prism when :tRATIONAL value = parse_rational(value) when :tSPACE + location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value)) value = nil when :tSTRING_BEG - if token.type == :HEREDOC_START - heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier]) - end - if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END + next_token, _ = lexed[index] + next_next_token, _ = lexed[index + 1] + basic_quotes = value == '"' || value == "'" + + if basic_quotes && next_token&.type == :STRING_END next_location = token.location.join(next_token.location) type = :tSTRING value = "" - location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) + location = range(next_location.start_offset, next_location.end_offset) index += 1 - elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END - next_location = token.location.join(next_next_token.location) - type = :tSTRING - value = next_token.value.gsub("\\\\", "\\") - location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) - index += 2 - elsif value.start_with?("<<") + elsif value.start_with?("'", '"', "%") + if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END + string_value = next_token.value + if simplify_string?(string_value, value) + next_location = token.location.join(next_next_token.location) + if percent_array?(value) + value = percent_array_unescape(string_value) + else + value = unescape_string(string_value, value) + end + type = :tSTRING + location = range(next_location.start_offset, next_location.end_offset) + index += 2 + tokens << [type, [value, location]] + + next + end + end + + quote_stack.push(value) + elsif token.type == :HEREDOC_START quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2] + heredoc_type = value[2] == "-" || value[2] == "~" ? value[2] : "" + heredoc = HeredocData.new( + identifier: value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier], + common_whitespace: 0, + ) + if quote == "`" type = :tXSTRING_BEG - value = "<<`" + end + + # The parser gem trims whitespace from squiggly heredocs. We must record + # the most common whitespace to later remove. + if heredoc_type == "~" || heredoc_type == "`" + heredoc.common_whitespace = calculate_heredoc_whitespace(index) + end + + if quote == "'" || quote == '"' || quote == "`" + value = "<<#{quote}" else - value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}" + value = '<<"' end + + heredoc_stack.push(heredoc) + quote_stack.push(value) end when :tSTRING_CONTENT - unless (lines = token.value.lines).one? - start_offset = offset_cache[token.location.start_offset] - lines.map do |line| - newline = line.end_with?("\r\n") ? "\r\n" : "\n" + is_percent_array = percent_array?(quote_stack.last) + + if (lines = token.value.lines).one? + # Prism usually emits a single token for strings with line continuations. + # For squiggly heredocs they are not joined so we do that manually here. + current_string = +"" + current_length = 0 + start_offset = token.location.start_offset + while token.type == :STRING_CONTENT + current_length += token.value.bytesize + # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line + # The parser gem only removes indentation when the heredoc is not nested + not_nested = heredoc_stack.size == 1 + if is_percent_array + value = percent_array_unescape(token.value) + elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0 + value = trim_heredoc_whitespace(token.value, current_heredoc) + end + + current_string << unescape_string(value, quote_stack.last) + relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I") + 0 # the last backslash escapes the newline + else + token.value[/(\\{1,})\n/, 1]&.length || 0 + end + if relevant_backslash_count.even? || !interpolation?(quote_stack.last) + tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]] + break + end + token, _ = lexed[index] + index += 1 + end + else + # When the parser gem encounters a line continuation inside of a multiline string, + # it emits a single string node. The backslash (and remaining newline) is removed. + current_line = +"" + adjustment = 0 + start_offset = token.location.start_offset + emit = false + + lines.each.with_index do |line, index| chomped_line = line.chomp - if match = chomped_line.match(/(?<backslashes>\\+)\z/) - adjustment = match[:backslashes].size / 2 - adjusted_line = chomped_line.delete_suffix("\\" * adjustment) - if match[:backslashes].size.odd? - adjusted_line.delete_suffix!("\\") - adjustment += 2 + backslash_count = chomped_line[/\\{1,}\z/]&.length || 0 + is_interpolation = interpolation?(quote_stack.last) + + if backslash_count.odd? && (is_interpolation || is_percent_array) + if is_percent_array + current_line << percent_array_unescape(line) + adjustment += 1 else - adjusted_line << newline + chomped_line.delete_suffix!("\\") + current_line << chomped_line + adjustment += 2 end + # If the string ends with a line continuation emit the remainder + emit = index == lines.count - 1 else - adjusted_line = line - adjustment = 0 + current_line << line + emit = true end - end_offset = start_offset + adjusted_line.length + adjustment - tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] - start_offset = end_offset + if emit + end_offset = start_offset + current_line.bytesize + adjustment + tokens << [:tSTRING_CONTENT, [unescape_string(current_line, quote_stack.last), range(start_offset, end_offset)]] + start_offset = end_offset + current_line = +"" + adjustment = 0 + end end - next end + next when :tSTRING_DVAR value = nil when :tSTRING_END if token.type == :HEREDOC_END && value.end_with?("\n") newline_length = value.end_with?("\r\n") ? 2 : 1 - value = heredoc_identifier_stack.pop - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length]) + value = heredoc_stack.pop.identifier + location = range(token.location.start_offset, token.location.end_offset - newline_length) elsif token.type == :REGEXP_END value = value[0] - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1]) + location = range(token.location.start_offset, token.location.start_offset + 1) + end + + if percent_array?(quote_stack.pop) + prev_token, _ = lexed[index - 2] if index - 2 >= 0 + empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type) + ends_with_whitespace = prev_token&.type == :WORDS_SEP + # parser always emits a space token after content in a percent array, even if no actual whitespace is present. + if !empty && !ends_with_whitespace + tokens << [:tSPACE, [nil, range(token.location.start_offset, token.location.start_offset)]] + end end when :tSYMBEG - if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR + if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value value = { "~@" => "~", "!@" => "!" }.fetch(value, value) - location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) + location = range(next_location.start_offset, next_location.end_offset) index += 1 + else + quote_stack.push(value) end when :tFID if !tokens.empty? && tokens.dig(-1, 0) == :kDEF type = :tIDENTIFIER end when :tXSTRING_BEG - if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END + if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type) + # self.`() type = :tBACK_REF2 end + quote_stack.push(value) + when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG + if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP + index += 1 + end + + quote_stack.push(value) + when :tREGEXP_BEG + quote_stack.push(value) end tokens << [type, [value, location]] if token.type == :REGEXP_END - tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]] + tokens << [:tREGEXP_OPT, [token.value[1..], range(token.location.start_offset + 1, token.location.end_offset)]] end end @@ -369,6 +540,11 @@ module Prism private + # Creates a new parser range, taking prisms byte offsets into account + def range(start_offset, end_offset) + Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset]) + end + # Parse an integer from the string representation. def parse_integer(value) Integer(value) @@ -410,6 +586,233 @@ module Prism rescue ArgumentError 0r end + + # Wonky heredoc tab/spaces rules. + # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L10548-L10558 + def calculate_heredoc_whitespace(heredoc_token_index) + next_token_index = heredoc_token_index + nesting_level = 0 + previous_line = -1 + result = Float::MAX + + while (next_token = lexed[next_token_index]&.first) + next_token_index += 1 + next_next_token, _ = lexed[next_token_index] + first_token_on_line = next_token.location.start_column == 0 + + # String content inside nested heredocs and interpolation is ignored + if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN + # When interpolation is the first token of a line there is no string + # content to check against. There will be no common whitespace. + if nesting_level == 0 && first_token_on_line + result = 0 + end + nesting_level += 1 + elsif next_token.type == :HEREDOC_END || next_token.type == :EMBEXPR_END + nesting_level -= 1 + # When we encountered the matching heredoc end, we can exit + break if nesting_level == -1 + elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line + common_whitespace = 0 + next_token.value[/^\s*/].each_char do |char| + if char == "\t" + common_whitespace = (common_whitespace / 8 + 1) * 8; + else + common_whitespace += 1 + end + end + + is_first_token_on_line = next_token.location.start_line != previous_line + # Whitespace is significant if followed by interpolation + whitespace_only = common_whitespace == next_token.value.length && next_next_token&.location&.start_line != next_token.location.start_line + if is_first_token_on_line && !whitespace_only && common_whitespace < result + result = common_whitespace + previous_line = next_token.location.start_line + end + end + end + result + end + + # Wonky heredoc tab/spaces rules. + # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L16528-L16545 + def trim_heredoc_whitespace(string, heredoc) + trimmed_whitespace = 0 + trimmed_characters = 0 + while (string[trimmed_characters] == "\t" || string[trimmed_characters] == " ") && trimmed_whitespace < heredoc.common_whitespace + if string[trimmed_characters] == "\t" + trimmed_whitespace = (trimmed_whitespace / 8 + 1) * 8; + break if trimmed_whitespace > heredoc.common_whitespace + else + trimmed_whitespace += 1 + end + trimmed_characters += 1 + end + + string[trimmed_characters..] + end + + # Escape sequences that have special and should appear unescaped in the resulting string. + ESCAPES = { + "a" => "\a", "b" => "\b", "e" => "\e", "f" => "\f", + "n" => "\n", "r" => "\r", "s" => "\s", "t" => "\t", + "v" => "\v", "\\" => "\\" + }.freeze + private_constant :ESCAPES + + # When one of these delimiters is encountered, then the other + # one is allowed to be escaped as well. + DELIMITER_SYMETRY = { "[" => "]", "(" => ")", "{" => "}", "<" => ">" }.freeze + private_constant :DELIMITER_SYMETRY + + + # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/lexer-strings.rl#L14 + REGEXP_META_CHARACTERS = ["\\", "$", "(", ")", "*", "+", ".", "<", ">", "?", "[", "]", "^", "{", "|", "}"] + private_constant :REGEXP_META_CHARACTERS + + # Apply Ruby string escaping rules + def unescape_string(string, quote) + # In single-quoted heredocs, everything is taken literally. + return string if quote == "<<'" + + # OPTIMIZATION: Assume that few strings need escaping to speed up the common case. + return string unless string.include?("\\") + + # Enclosing character for the string. `"` for `"foo"`, `{` for `%w{foo}`, etc. + delimiter = quote[-1] + + if regexp?(quote) + # Should be escaped handled to single-quoted heredocs. The only character that is + # allowed to be escaped is the delimiter, except when that also has special meaning + # in the regexp. Since all the symetry delimiters have special meaning, they don't need + # to be considered separately. + if REGEXP_META_CHARACTERS.include?(delimiter) + string + else + # There can never be an even amount of backslashes. It would be a syntax error. + string.gsub(/\\(#{Regexp.escape(delimiter)})/, '\1') + end + elsif interpolation?(quote) + # Appending individual escape sequences may force the string out of its intended + # encoding. Start out with binary and force it back later. + result = "".b + + scanner = StringScanner.new(string) + while (skipped = scanner.skip_until(/\\/)) + # Append what was just skipped over, excluding the found backslash. + result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1)) + escape_read(result, scanner, false, false) + end + + # Add remaining chars + result.append_as_bytes(string.byteslice(scanner.pos..)) + result.force_encoding(source_buffer.source.encoding) + else + delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}") + string.gsub(/\\([\\#{delimiters}])/, '\1') + end + end + + # Certain strings are merged into a single string token. + def simplify_string?(value, quote) + case quote + when "'" + # Only simplify 'foo' + !value.include?("\n") + when '"' + # Simplify when every line ends with a line continuation, or it is the last line + value.lines.all? do |line| + !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd? + end + else + # %q and similar are never simplified + false + end + end + + # Escape a byte value, given the control and meta flags. + def escape_build(value, control, meta) + value &= 0x9f if control + value |= 0x80 if meta + value + end + + # Read an escape out of the string scanner, given the control and meta + # flags, and push the unescaped value into the result. + def escape_read(result, scanner, control, meta) + if scanner.skip("\n") + # Line continuation + elsif (value = ESCAPES[scanner.peek(1)]) + # Simple single-character escape sequences like \n + result.append_as_bytes(value) + scanner.pos += 1 + elsif (value = scanner.scan(/[0-7]{1,3}/)) + # \nnn + result.append_as_bytes(escape_build(value.to_i(8), control, meta)) + elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/)) + # \xnn + result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta)) + elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/)) + # \unnnn + result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8)) + elsif scanner.skip("u{}") + # https://github.com/whitequark/parser/issues/856 + elsif (value = scanner.scan(/u{.*?}/)) + # \u{nnnn ...} + value[2..-2].split.each do |unicode| + result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8)) + end + elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/)) + # \cx or \C-x where x is an ASCII printable character + escape_read(result, scanner, true, meta) + elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/)) + # \M-x where x is an ASCII printable character + escape_read(result, scanner, control, true) + elsif (byte = scanner.scan_byte) + # Something else after an escape. + if control && byte == 0x3f # ASCII '?' + result.append_as_bytes(escape_build(0x7f, false, meta)) + else + result.append_as_bytes(escape_build(byte, control, meta)) + end + end + end + + # In a percent array, certain whitespace can be preceeded with a backslash, + # causing the following characters to be part of the previous element. + def percent_array_unescape(string) + string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match| + full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd? + full_match + end + end + + # For %-arrays whitespace, the parser gem only considers whitespace before the newline. + def percent_array_leading_whitespace(string) + return 1 if string.start_with?("\n") + + leading_whitespace = 0 + string.each_char do |c| + break if c == "\n" + leading_whitespace += 1 + end + leading_whitespace + end + + # Determine if characters preceeded by a backslash should be escaped or not + def interpolation?(quote) + !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s") + end + + # Regexp allow interpolation but are handled differently during unescaping + def regexp?(quote) + quote == "/" || quote.start_with?("%r") + end + + # Determine if the string is part of a %-style array. + def percent_array?(quote) + quote.start_with?("%w", "%W", "%i", "%I") + end end end end diff --git a/lib/prism/translation/parser/rubocop.rb b/lib/prism/translation/parser/rubocop.rb deleted file mode 100644 index 6c9687a5cc..0000000000 --- a/lib/prism/translation/parser/rubocop.rb +++ /dev/null @@ -1,73 +0,0 @@ -# frozen_string_literal: true -# typed: ignore - -warn "WARN: Prism is directly supported since RuboCop 1.62. The `prism/translation/parser/rubocop` file is deprecated." - -require "parser" -require "rubocop" - -require_relative "../../prism" -require_relative "../parser" - -module Prism - module Translation - class Parser - # This is the special version numbers that should be used in RuboCop - # configuration files to trigger using prism. - - # For Ruby 3.3 - VERSION_3_3 = 80_82_73_83_77.33 - - # For Ruby 3.4 - VERSION_3_4 = 80_82_73_83_77.34 - - # This module gets prepended into RuboCop::AST::ProcessedSource. - module ProcessedSource - # This condition is compatible with rubocop-ast versions up to 1.30.0. - if RuboCop::AST::ProcessedSource.instance_method(:parser_class).arity == 1 - # Redefine parser_class so that we can inject the prism parser into the - # list of known parsers. - def parser_class(ruby_version) - if ruby_version == Prism::Translation::Parser::VERSION_3_3 - warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.33` is deprecated. " \ - "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.3` instead." - require_relative "../parser33" - Prism::Translation::Parser33 - elsif ruby_version == Prism::Translation::Parser::VERSION_3_4 - warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.34` is deprecated. " \ - "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.4` instead." - require_relative "../parser34" - Prism::Translation::Parser34 - else - super - end - end - else - # Redefine parser_class so that we can inject the prism parser into the - # list of known parsers. - def parser_class(ruby_version, _parser_engine) - if ruby_version == Prism::Translation::Parser::VERSION_3_3 - warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.33` is deprecated. " \ - "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.3` instead." - require_relative "../parser33" - Prism::Translation::Parser33 - elsif ruby_version == Prism::Translation::Parser::VERSION_3_4 - warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.34` is deprecated. " \ - "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.4` instead." - require_relative "../parser34" - Prism::Translation::Parser34 - else - super - end - end - end - end - end - end -end - -# :stopdoc: -RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource) -known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES) -RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES) -RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb deleted file mode 100644 index b09266e06a..0000000000 --- a/lib/prism/translation/parser33.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. - class Parser33 < Parser - def version # :nodoc: - 33 - end - end - end -end diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb deleted file mode 100644 index 0ead70ad3c..0000000000 --- a/lib/prism/translation/parser34.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -module Prism - module Translation - # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. - class Parser34 < Parser - def version # :nodoc: - 34 - end - end - end -end diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb new file mode 100644 index 0000000000..f7c1070e30 --- /dev/null +++ b/lib/prism/translation/parser_current.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true +# :markup: markdown +#-- +# typed: ignore + +module Prism + module Translation + case RUBY_VERSION + when /^3\.3\./ + ParserCurrent = Parser33 + when /^3\.4\./ + ParserCurrent = Parser34 + when /^3\.5\./, /^4\.0\./ + ParserCurrent = Parser40 + when /^4\.1\./ + ParserCurrent = Parser41 + else + # Keep this in sync with released Ruby. + parser = Parser40 + major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments + warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \ + "but you are running #{major}.#{minor}." + ParserCurrent = parser + end + end +end diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb new file mode 100644 index 0000000000..720c7d548c --- /dev/null +++ b/lib/prism/translation/parser_versions.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true +# :markup: markdown + +module Prism + module Translation + # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`. + class Parser33 < Parser + def version # :nodoc: + 33 + end + end + + # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`. + class Parser34 < Parser + def version # :nodoc: + 34 + end + end + + # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`. + class Parser40 < Parser + def version # :nodoc: + 40 + end + end + + Parser35 = Parser40 # :nodoc: + + # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`. + class Parser41 < Parser + def version # :nodoc: + 41 + end + end + end +end diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 3c06f6a40d..f179a149a1 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true - -require "ripper" +# :markup: markdown module Prism module Translation @@ -19,31 +18,19 @@ module Prism # The main known difference is that we may omit dispatching some events in # some cases. This impacts the following events: # - # * on_assign_error - # * on_comma - # * on_ignored_nl - # * on_ignored_sp - # * on_kw - # * on_label_end - # * on_lbrace - # * on_lbracket - # * on_lparen - # * on_nl - # * on_op - # * on_operator_ambiguous - # * on_rbrace - # * on_rbracket - # * on_rparen - # * on_semicolon - # * on_sp - # * on_symbeg - # * on_tstring_beg - # * on_tstring_end + # - on_assign_error + # - on_comma + # - on_ignored_nl + # - on_ignored_sp + # - on_nl + # - on_operator_ambiguous + # - on_semicolon + # - on_sp # class Ripper < Compiler # Parses the given Ruby program read from +src+. # +src+ must be a String or an IO or a object with a #gets method. - def Ripper.parse(src, filename = "(ripper)", lineno = 1) + def self.parse(src, filename = "(ripper)", lineno = 1) new(src, filename, lineno).parse end @@ -54,23 +41,24 @@ module Prism # By default, this method does not handle syntax errors in +src+, # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. # - # require 'ripper' - # require 'pp' + # require "ripper" + # require "pp" # - # pp Ripper.lex("def m(a) nil end") - # #=> [[[1, 0], :on_kw, "def", FNAME ], - # [[1, 3], :on_sp, " ", FNAME ], - # [[1, 4], :on_ident, "m", ENDFN ], - # [[1, 5], :on_lparen, "(", BEG|LABEL], - # [[1, 6], :on_ident, "a", ARG ], - # [[1, 7], :on_rparen, ")", ENDFN ], - # [[1, 8], :on_sp, " ", BEG ], - # [[1, 9], :on_kw, "nil", END ], - # [[1, 12], :on_sp, " ", END ], - # [[1, 13], :on_kw, "end", END ]] + # pp Ripper.lex("def m(a) nil end") + # #=> [[[1, 0], :on_kw, "def", FNAME ], + # [[1, 3], :on_sp, " ", FNAME ], + # [[1, 4], :on_ident, "m", ENDFN ], + # [[1, 5], :on_lparen, "(", BEG|LABEL], + # [[1, 6], :on_ident, "a", ARG ], + # [[1, 7], :on_rparen, ")", ENDFN ], + # [[1, 8], :on_sp, " ", BEG ], + # [[1, 9], :on_kw, "nil", END ], + # [[1, 12], :on_sp, " ", END ], + # [[1, 13], :on_kw, "end", END ]] # - def Ripper.lex(src, filename = "-", lineno = 1, raise_errors: false) - result = Prism.lex_compat(src, filepath: filename, line: lineno) + def self.lex(src, filename = "-", lineno = 1, raise_errors: false) + coerced = coerce_source(src) + result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding) if result.failure? && raise_errors raise SyntaxError, result.errors.first.message @@ -79,6 +67,34 @@ module Prism end end + # Tokenizes the Ruby program and returns an array of strings. + # The +filename+ and +lineno+ arguments are mostly ignored, since the + # return value is just the tokenized input. + # By default, this method does not handle syntax errors in +src+, + # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+. + # + # p Ripper.tokenize("def m(a) nil end") + # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] + # + def self.tokenize(...) + lex(...).map { |token| token[2] } + end + + # Mirros the various lex_types that ripper supports + def self.coerce_source(source) # :nodoc: + if source.is_a?(IO) + source.read + elsif source.respond_to?(:gets) + src = +"" + while line = source.gets + src << line + end + src + else + source.to_str + end + end + # This contains a table of all of the parser events and their # corresponding arity. PARSER_EVENT_TABLE = { @@ -331,7 +347,7 @@ module Prism "__ENCODING__", "__FILE__", "__LINE__" - ] + ].to_set # A list of all of the Ruby binary operators. BINARY_OPERATORS = [ @@ -356,7 +372,7 @@ module Prism :/, :*, :** - ] + ].to_set private_constant :KEYWORDS, :BINARY_OPERATORS @@ -368,17 +384,17 @@ module Prism # returning +nil+ in such cases. Use the +raise_errors+ keyword # to raise a SyntaxError for an error in +src+. # - # require "ripper" - # require "pp" + # require "ripper" + # require "pp" # - # pp Ripper.sexp("def m(a) nil end") - # #=> [:program, - # [[:def, - # [:@ident, "m", [1, 4]], - # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]], - # [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]] + # pp Ripper.sexp("def m(a) nil end") + # #=> [:program, + # [[:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]], + # [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]] # - def Ripper.sexp(src, filename = "-", lineno = 1, raise_errors: false) + def self.sexp(src, filename = "-", lineno = 1, raise_errors: false) builder = SexpBuilderPP.new(src, filename, lineno) sexp = builder.parse if builder.error? @@ -397,23 +413,23 @@ module Prism # returning +nil+ in such cases. Use the +raise_errors+ keyword # to raise a SyntaxError for an error in +src+. # - # require 'ripper' - # require 'pp' + # require "ripper" + # require "pp" # - # pp Ripper.sexp_raw("def m(a) nil end") - # #=> [:program, - # [:stmts_add, - # [:stmts_new], - # [:def, - # [:@ident, "m", [1, 4]], - # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], - # [:bodystmt, - # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], - # nil, - # nil, - # nil]]]] + # pp Ripper.sexp_raw("def m(a) nil end") + # #=> [:program, + # [:stmts_add, + # [:stmts_new], + # [:def, + # [:@ident, "m", [1, 4]], + # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], + # [:bodystmt, + # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], + # nil, + # nil, + # nil]]]] # - def Ripper.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) + def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) builder = SexpBuilder.new(src, filename, lineno) sexp = builder.parse if builder.error? @@ -425,9 +441,93 @@ module Prism end end + autoload :Filter, "prism/translation/ripper/filter" + autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" + # Provides optimized access to line and column information. + # Ripper bounds are mostly accessed in a linear fashion, so + # we can try a linear scan first and fall back to binary search. + class LineAndColumnCache # :nodoc: + # How many should it look ahead/behind before falling back to binary searching. + WINDOW = 8 + private_constant :WINDOW + + #: (Source source) -> void + def initialize(source) + @source = source + @offsets = source.offsets + @hint = 0 + end + + #: (Integer byte_offset) -> [Integer, Integer] + def line_and_column(byte_offset) + @hint = new_hint(byte_offset) || @source.find_line(byte_offset) + return [@hint + @source.start_line, byte_offset - @offsets[@hint]] + end + + private + + def new_hint(byte_offset) + if @offsets[@hint] <= byte_offset + # Same line? + if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset) + return @hint + end + + # Scan forwards + limit = [@hint + WINDOW + 1, @offsets.size].min + idx = @hint + 1 + while idx < limit + if @offsets[idx] > byte_offset + return idx - 1 + end + if @offsets[idx] == byte_offset + return idx + end + idx += 1 + end + else + # Scan backwards + limit = @hint > WINDOW ? @hint - WINDOW : 0 + idx = @hint + while idx >= limit + 1 + if @offsets[idx - 1] <= byte_offset + return idx - 1 + end + idx -= 1 + end + end + + nil + end + end + + # :stopdoc: + # This is not part of the public API but used by some gems. + + # Ripper-internal bitflags. + LEX_STATE_NAMES = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze + private_constant :LEX_STATE_NAMES + + LEX_STATE_NAMES.each do |value, key| + const_set("EXPR_#{key}", value) + end + EXPR_NONE = 0 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS + EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG + EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN + + def self.lex_state_name(state) + LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|") + end + + # :startdoc: + # The source that is being parsed. attr_reader :source @@ -437,16 +537,17 @@ module Prism # The current line number of the parser. attr_reader :lineno - # The current column number of the parser. + # The current column in bytes of the parser. attr_reader :column # Create a new Translation::Ripper object with the given source. def initialize(source, filename = "(ripper)", lineno = 1) - @source = source + @source = Ripper.coerce_source(source) @filename = filename @lineno = lineno @column = 0 @result = nil + @line_and_column_cache = nil end ########################################################################## @@ -465,7 +566,12 @@ module Prism bounds(location) if comment.is_a?(InlineComment) - on_comment(comment.slice) + # Inline comments always contain a newline if the line itself contains it + if result.source.source.bytesize > comment.location.end_offset + on_comment("#{comment.slice}\n") + else + on_comment(comment.slice) + end else offset = location.start_offset lines = comment.slice.lines @@ -546,9 +652,14 @@ module Prism # Visitor methods ########################################################################## + # :stopdoc: + # alias foo bar # ^^^^^^^^^^^^^ def visit_alias_method_node(node) + bounds(node.keyword_loc) + on_kw("alias") + new_name = visit(node.new_name) old_name = visit(node.old_name) @@ -559,6 +670,9 @@ module Prism # alias $foo $bar # ^^^^^^^^^^^^^^^ def visit_alias_global_variable_node(node) + bounds(node.keyword_loc) + on_kw("alias") + new_name = visit_alias_global_variable_node_value(node.new_name) old_name = visit_alias_global_variable_node_value(node.old_name) @@ -584,6 +698,10 @@ module Prism # ^^^^^^^^^ def visit_alternation_pattern_node(node) left = visit_pattern_node(node.left) + + bounds(node.operator_loc) + on_op("|") + right = visit_pattern_node(node.right) bounds(node.location) @@ -594,7 +712,13 @@ module Prism # parenthesis node that can be used to wrap patterns. private def visit_pattern_node(node) if node.is_a?(ParenthesesNode) - visit(node.body) + bounds(node.opening_loc) + on_lparen("(") + result = visit(node.body) + bounds(node.closing_loc) + on_rparen(")") + + result else visit(node) end @@ -604,6 +728,14 @@ module Prism # ^^^^^^^ def visit_and_node(node) left = visit(node.left) + + bounds(node.operator_loc) + if node.operator == "and" + on_kw("and") + else + on_op("&&") + end + right = visit(node.right) bounds(node.location) @@ -631,6 +763,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%i/ @@ -650,6 +784,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%W/ @@ -687,6 +823,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) when /^%I/ @@ -724,6 +862,8 @@ module Prism previous = element end + visit_words_sep(opening_loc, node.elements.last, node.closing_loc) + bounds(node.closing_loc) on_tstring_end(node.closing) else @@ -740,15 +880,21 @@ module Prism on_array(elements) end - # Dispatch a words_sep event that contains the space between the elements + # Dispatch words_sep events that contains the whitespace between the elements # of list literals. private def visit_words_sep(opening_loc, previous, current) - end_offset = (previous.nil? ? opening_loc : previous.location).end_offset - start_offset = current.location.start_offset - - if end_offset != start_offset - bounds(current.location.copy(start_offset: end_offset)) - on_words_sep(source.byteslice(end_offset...start_offset)) + start_offset = (previous.nil? ? opening_loc : previous.location).end_offset + end_offset = current.start_offset + length = end_offset - start_offset + + if length > 0 + whitespace = source.byteslice(start_offset, length) + current_offset = start_offset + whitespace.each_line do |part| + bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize)) + on_words_sep(part) + current_offset += part.bytesize + end end end @@ -774,9 +920,18 @@ module Prism # ^^^^^ def visit_array_pattern_node(node) constant = visit(node.constant) + + if node.opening_loc + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + end + requireds = visit_all(node.requireds) if node.requireds.any? rest = if (rest_node = node.rest).is_a?(SplatNode) + bounds(rest_node.operator_loc) + on_op("*") + if rest_node.expression.nil? bounds(rest_node.location) on_var_field(nil) @@ -787,6 +942,10 @@ module Prism posts = visit_all(node.posts) if node.posts.any? + if node.closing_loc + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + end bounds(node.location) on_aryptn(constant, requireds, rest, posts) end @@ -802,6 +961,12 @@ module Prism # ^^^^ def visit_assoc_node(node) key = visit(node.key) + + if node.operator_loc + bounds(node.operator_loc) + on_op("=>") + end + value = visit(node.value) bounds(node.location) @@ -814,6 +979,9 @@ module Prism # { **foo } # ^^^^^ def visit_assoc_splat_node(node) + bounds(node.operator_loc) + on_op("**") + value = visit(node.value) bounds(node.location) @@ -830,8 +998,18 @@ module Prism # begin end # ^^^^^^^^^ def visit_begin_node(node) + if node.begin_keyword_loc + bounds(node.begin_keyword_loc) + on_kw("begin") + end + clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false) + if node.end_keyword_loc + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) on_begin(clauses) end @@ -843,7 +1021,7 @@ module Prism on_stmts_add(on_stmts_new, on_void_stmt) else body = node.statements.body - body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline) bounds(node.statements.location) visit_statements_node_body(body) @@ -852,12 +1030,15 @@ module Prism rescue_clause = visit(node.rescue_clause) else_clause = unless (else_clause_node = node.else_clause).nil? + bounds(else_clause_node.else_keyword_loc) + on_kw("else") + else_statements = if else_clause_node.statements.nil? [nil] else body = else_clause_node.statements.body - body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline) body end @@ -879,7 +1060,7 @@ module Prism on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil) when StatementsNode body = [*node.body] - body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline) + body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline) stmts = visit_statements_node_body(body) bounds(node.body.first.location) @@ -894,6 +1075,8 @@ module Prism # foo(&bar) # ^^^^ def visit_block_argument_node(node) + bounds(node.operator_loc) + on_op("&") visit(node.expression) end @@ -907,6 +1090,13 @@ module Prism # Visit a BlockNode. def visit_block_node(node) braces = node.opening == "{" + bounds(node.opening_loc) + if braces + on_lbrace("{") + else + on_kw("do") + end + parameters = visit(node.parameters) body = @@ -919,7 +1109,7 @@ module Prism braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -931,6 +1121,14 @@ module Prism end if braces + bounds(node.closing_loc) + on_rbrace("}") + else + bounds(node.closing_loc) + on_kw("end") + end + + if braces bounds(node.location) on_brace_block(parameters, body) else @@ -942,12 +1140,15 @@ module Prism # def foo(&bar); end # ^^^^ def visit_block_parameter_node(node) + bounds(node.operator_loc) + on_op("&") + if node.name_loc.nil? bounds(node.location) on_blockarg(nil) else bounds(node.name_loc) - name = visit_token(node.name.to_s) + name = on_ident(node.name.to_s) bounds(node.location) on_blockarg(name) @@ -956,6 +1157,9 @@ module Prism # A block's parameters. def visit_block_parameters_node(node) + bounds(node.opening_loc) + on_op("|") + parameters = if node.parameters.nil? on_params(nil, nil, nil, nil, nil, nil, nil) @@ -970,6 +1174,9 @@ module Prism false end + bounds(node.closing_loc) + on_op("|") + bounds(node.location) on_block_var(parameters, locals) end @@ -980,6 +1187,9 @@ module Prism # break foo # ^^^^^^^^^ def visit_break_node(node) + bounds(node.keyword_loc) + on_kw("break") + if node.arguments.nil? bounds(node.location) on_break(on_args_new) @@ -1004,20 +1214,32 @@ module Prism case node.name when :[] receiver = visit(node.receiver) - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.opening_loc) + on_lbracket("[") + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + + bounds(node.closing_loc) + on_rbracket("]") + + block = visit(block_node) bounds(node.location) call = on_aref(receiver, arguments) - if block.nil? - call - else + if block_node bounds(node.location) on_method_add_block(call, block) + else + call end when :[]= receiver = visit(node.receiver) + bounds(node.opening_loc) + on_lbracket("[") + *arguments, last_argument = node.arguments.arguments arguments << node.block if !node.block.nil? @@ -1033,6 +1255,11 @@ module Prism end end + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.equal_loc) + on_op("=") + bounds(node.location) call = on_aref_field(receiver, arguments) value = visit_write_value(last_argument) @@ -1040,17 +1267,54 @@ module Prism bounds(last_argument.location) on_assign(call, value) when :-@, :+@, :~ - receiver = visit(node.receiver) + bounds(node.message_loc) + on_op(node.message) + receiver = visit(node.receiver) bounds(node.location) on_unary(node.name, receiver) when :! - receiver = visit(node.receiver) + bounds(node.message_loc) + if node.message == "not" + on_kw("not") - bounds(node.location) - on_unary(node.message == "not" ? :not : :!, receiver) - when *BINARY_OPERATORS + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + receiver = + if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil? + # The parens in `not()` just emit parens and nothing else. + bounds(node.receiver.opening_loc) + on_lparen("(") + bounds(node.receiver.closing_loc) + on_rparen(")") + nil + else + visit(node.receiver) + end + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + bounds(node.location) + on_unary(:not, receiver) + else + on_op("!") + + receiver = visit(node.receiver) + + bounds(node.location) + on_unary(:!, receiver) + end + when BINARY_OPERATORS receiver = visit(node.receiver) + + bounds(node.message_loc) + on_op(node.message) + value = visit(node.arguments.arguments.first) bounds(node.location) @@ -1062,9 +1326,21 @@ module Prism if node.variable_call? on_vcall(message) else - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + + block = visit(block_node) call = - if node.opening_loc.nil? && arguments&.any? + if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any? bounds(node.location) on_command(message, arguments) elsif !node.opening_loc.nil? @@ -1075,11 +1351,11 @@ module Prism on_method_add_arg(on_fcall(message), on_args_new) end - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end end @@ -1087,7 +1363,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) message = if node.message_loc.nil? @@ -1097,13 +1373,30 @@ module Prism visit_token(node.message, false) end + if node.equal_loc + bounds(node.equal_loc) + on_op("=") + end + if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil? value = visit_write_value(node.arguments.arguments.first) bounds(node.location) on_assign(on_field(receiver, call_operator, message), value) else - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + if node.opening_loc + bounds(node.opening_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location)) + + if node.closing_loc + bounds(node.closing_loc) + on_rparen(")") + end + + block = visit(block_node) call = if node.opening_loc.nil? bounds(node.location) @@ -1121,27 +1414,35 @@ module Prism on_method_add_arg(on_call(receiver, call_operator, message), arguments) end - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end end end - # Visit the arguments and block of a call node and return the arguments - # and block as they should be used. - private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + # Extract the arguments and block Ripper-style, which means if the block + # is like `&b` then it's moved to arguments. + private def get_arguments_and_block(arguments_node, block_node) arguments = arguments_node&.arguments || [] block = block_node if block.is_a?(BlockArgumentNode) - arguments << block + arguments += [block] block = nil end + [arguments, block] + end + + # Visit the arguments and block of a call node and return the arguments + # and block as they should be used. + private def visit_call_node_arguments(arguments_node, block_node, trailing_comma) + arguments, block = get_arguments_and_block(arguments_node, block_node) + [ if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode) visit(arguments.first) @@ -1155,7 +1456,7 @@ module Prism on_args_add_block(args, false) end end, - visit(block) + block, ] end @@ -1173,7 +1474,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1181,8 +1482,8 @@ module Prism bounds(node.location) target = on_field(receiver, call_operator, message) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -1195,7 +1496,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1217,7 +1518,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1239,6 +1540,9 @@ module Prism if node.call_operator == "::" receiver = visit(node.receiver) + bounds(node.call_operator_loc) + on_op("::") + bounds(node.message_loc) message = visit_token(node.message) @@ -1248,7 +1552,7 @@ module Prism receiver = visit(node.receiver) bounds(node.call_operator_loc) - call_operator = visit_token(node.call_operator) + call_operator = visit_call_operator(node.call_operator) bounds(node.message_loc) message = visit_token(node.message) @@ -1262,6 +1566,10 @@ module Prism # ^^^^^^^^^^ def visit_capture_pattern_node(node) value = visit(node.value) + + bounds(node.operator_loc) + on_op("=>") + target = visit(node.target) bounds(node.location) @@ -1271,10 +1579,21 @@ module Prism # case foo; when bar; end # ^^^^^^^^^^^^^^^^^^^^^^^ def visit_case_node(node) + bounds(node.case_keyword_loc) + on_kw("case") + predicate = visit(node.predicate) + visited_conditions = node.conditions.map { |condition| visit(condition) } + visited_else_clause = visit(node.else_clause) + + if !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + clauses = - node.conditions.reverse_each.inject(visit(node.consequent)) do |consequent, condition| - on_when(*visit(condition), consequent) + visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition| + on_when(*condition, current) end bounds(node.location) @@ -1284,10 +1603,23 @@ module Prism # case foo; in bar; end # ^^^^^^^^^^^^^^^^^^^^^ def visit_case_match_node(node) + bounds(node.case_keyword_loc) + on_kw("case") + predicate = visit(node.predicate) + visited_conditions = node.conditions.map do | condition| + visit(condition) + end + visited_else_clause = visit(node.else_clause) + + if !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end + clauses = - node.conditions.reverse_each.inject(visit(node.consequent)) do |consequent, condition| - on_in(*visit(condition), consequent) + visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition| + on_in(*condition, current) end bounds(node.location) @@ -1297,6 +1629,9 @@ module Prism # class Foo; end # ^^^^^^^^^^^^^^ def visit_class_node(node) + bounds(node.class_keyword_loc) + on_kw("class") + constant_path = if node.constant_path.is_a?(ConstantReadNode) bounds(node.constant_path.location) @@ -1305,9 +1640,17 @@ module Prism visit(node.constant_path) end + if node.inheritance_operator_loc + bounds(node.inheritance_operator_loc) + on_op("<") + end + superclass = visit(node.superclass) bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_class(constant_path, superclass, bodystmt) end @@ -1321,12 +1664,13 @@ module Prism # @@foo = 1 # ^^^^^^^^^ - # - # @@foo, @@bar = 1 - # ^^^^^ ^^^^^ def visit_class_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_cvar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1339,8 +1683,8 @@ module Prism bounds(node.name_loc) target = on_var_field(on_cvar(node.name.to_s)) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -1391,12 +1735,13 @@ module Prism # Foo = 1 # ^^^^^^^ - # - # Foo, Bar = 1 - # ^^^ ^^^ def visit_constant_write_node(node) bounds(node.name_loc) target = on_var_field(on_const(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1409,8 +1754,8 @@ module Prism bounds(node.name_loc) target = on_var_field(on_const(node.name.to_s)) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -1456,16 +1801,24 @@ module Prism # ^^^^^^^^ def visit_constant_path_node(node) if node.parent.nil? - bounds(node.child.location) - child = on_const(node.child.name.to_s) + if node.delimiter_loc + bounds(node.delimiter_loc) + on_op("::") + end + + bounds(node.name_loc) + child = on_const(node.name.to_s) bounds(node.location) on_top_const_ref(child) else parent = visit(node.parent) - bounds(node.child.location) - child = on_const(node.child.name.to_s) + bounds(node.delimiter_loc) + on_op("::") + + bounds(node.name_loc) + child = on_const(node.name.to_s) bounds(node.location) on_const_path_ref(parent, child) @@ -1474,11 +1827,12 @@ module Prism # Foo::Bar = 1 # ^^^^^^^^^^^^ - # - # Foo::Foo, Bar::Bar = 1 - # ^^^^^^^^ ^^^^^^^^ def visit_constant_path_write_node(node) target = visit_constant_path_write_node_target(node.target) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1488,16 +1842,24 @@ module Prism # Visit a constant path that is part of a write node. private def visit_constant_path_write_node_target(node) if node.parent.nil? - bounds(node.child.location) - child = on_const(node.child.name.to_s) + if node.delimiter_loc + bounds(node.delimiter_loc) + on_op("::") + end + + bounds(node.name_loc) + child = on_const(node.name.to_s) bounds(node.location) on_top_const_field(child) else parent = visit(node.parent) - bounds(node.child.location) - child = on_const(node.child.name.to_s) + bounds(node.delimiter_loc) + on_op("::") + + bounds(node.name_loc) + child = on_const(node.name.to_s) bounds(node.location) on_const_path_field(parent, child) @@ -1508,10 +1870,9 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_constant_path_operator_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -1522,7 +1883,6 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_constant_path_and_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.operator_loc) operator = on_op("&&=") @@ -1536,7 +1896,6 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_constant_path_or_write_node(node) target = visit_constant_path_write_node_target(node.target) - value = visit(node.value) bounds(node.operator_loc) operator = on_op("||=") @@ -1558,16 +1917,24 @@ module Prism # def self.foo; end # ^^^^^^^^^^^^^^^^^ def visit_def_node(node) + bounds(node.def_keyword_loc) + on_kw("def") + receiver = visit(node.receiver) operator = if !node.operator_loc.nil? bounds(node.operator_loc) - visit_token(node.operator) + node.operator == "." ? on_period(".") : on_op("::") end bounds(node.name_loc) name = visit_token(node.name_loc.slice) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + parameters = if node.parameters.nil? bounds(node.location) @@ -1577,10 +1944,17 @@ module Prism end if !node.lparen_loc.nil? + bounds(node.rparen_loc) + on_rparen(")") bounds(node.lparen_loc) parameters = on_paren(parameters) end + if node.equal_loc + bounds(node.equal_loc) + on_op("=") + end + bodystmt = if node.equal_loc.nil? visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body) @@ -1591,11 +1965,16 @@ module Prism on_bodystmt(body, nil, nil, nil) end + if node.end_keyword_loc + bounds(node.end_keyword_loc) + on_kw("end") + end + bounds(node.location) - if receiver.nil? - on_def(name, parameters, bodystmt) - else + if receiver on_defs(receiver, operator, name, parameters, bodystmt) + else + on_def(name, parameters, bodystmt) end end @@ -1605,24 +1984,59 @@ module Prism # defined?(a) # ^^^^^^^^^^^ def visit_defined_node(node) + bounds(node.keyword_loc) + on_kw("defined?") + + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + + expression = visit(node.value) + + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + # Very weird circumstances here where something like: + # + # defined? + # (1) + # + # gets parsed in Ruby as having only the `1` expression but in Ripper it + # gets parsed as having a parentheses node. In this case we need to + # synthesize that node to match Ripper's behavior. + if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n") + bounds(node.lparen_loc.join(node.rparen_loc)) + expression = on_paren(on_stmts_add(on_stmts_new, expression)) + end + bounds(node.location) - on_defined(visit(node.value)) + on_defined(expression) end # if foo then bar else baz end # ^^^^^^^^^^^^ def visit_else_node(node) + bounds(node.else_keyword_loc) + on_kw("else") + statements = if node.statements.nil? [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) + body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false) body end + else_statements = visit_statements_node_body(statements) + + bounds(node.end_keyword_loc) + on_kw("end") bounds(node.location) - on_else(visit_statements_node_body(statements)) + on_else(else_statements) end # "foo #{bar}" @@ -1660,12 +2074,15 @@ module Prism # Visit an EnsureNode node. def visit_ensure_node(node) + bounds(node.ensure_keyword_loc) + on_kw("ensure") + statements = if node.statements.nil? [nil] else body = node.statements.body - body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false) + body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false) body end @@ -1686,6 +2103,14 @@ module Prism # ^^^^^^^^^^^ def visit_find_pattern_node(node) constant = visit(node.constant) + + if node.opening_loc + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + end + bounds(node.left.operator_loc) + on_op("*") + left = if node.left.expression.nil? bounds(node.left.location) @@ -1695,6 +2120,10 @@ module Prism end requireds = visit_all(node.requireds) if node.requireds.any? + + bounds(node.right.operator_loc) + on_op("*") + right = if node.right.expression.nil? bounds(node.right.location) @@ -1703,6 +2132,10 @@ module Prism visit(node.right.expression) end + if node.closing_loc + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + end bounds(node.location) on_fndptn(constant, left, requireds, right) end @@ -1711,6 +2144,10 @@ module Prism # ^^^^^^^^^^ def visit_flip_flop_node(node) left = visit(node.left) + + bounds(node.operator_loc) + on_op(node.operator) + right = visit(node.right) bounds(node.location) @@ -1730,8 +2167,18 @@ module Prism # for foo in bar do end # ^^^^^^^^^^^^^^^^^^^^^ def visit_for_node(node) + bounds(node.for_keyword_loc) + on_kw("for") + index = visit(node.index) + bounds(node.in_keyword_loc) + on_kw("in") + collection = visit(node.collection) + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end statements = if node.statements.nil? bounds(node.location) @@ -1740,6 +2187,9 @@ module Prism visit(node.statements) end + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_for(index, collection, statements) end @@ -1748,6 +2198,7 @@ module Prism # ^^^ def visit_forwarding_arguments_node(node) bounds(node.location) + on_op("...") on_args_forward end @@ -1755,6 +2206,7 @@ module Prism # ^^^ def visit_forwarding_parameter_node(node) bounds(node.location) + on_op("...") on_args_forward end @@ -1764,6 +2216,9 @@ module Prism # super {} # ^^^^^^^^ def visit_forwarding_super_node(node) + bounds(node.keyword_loc) + on_kw("super") + if node.block.nil? bounds(node.location) on_zsuper @@ -1784,12 +2239,13 @@ module Prism # $foo = 1 # ^^^^^^^^ - # - # $foo, $bar = 1 - # ^^^^ ^^^^ def visit_global_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_gvar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -1802,8 +2258,8 @@ module Prism bounds(node.name_loc) target = on_var_field(on_gvar(node.name.to_s)) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -1848,6 +2304,9 @@ module Prism # {} # ^^ def visit_hash_node(node) + bounds(node.opening_loc) + on_lbrace("{") + elements = if node.elements.any? args = visit_all(node.elements) @@ -1856,6 +2315,8 @@ module Prism on_assoclist_from_args(args) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_hash(elements) end @@ -1864,6 +2325,15 @@ module Prism # ^^ def visit_hash_pattern_node(node) constant = visit(node.constant) + + if node.constant + bounds(node.opening_loc) + node.opening == "[" ? on_lbracket("[") : on_lparen("(") + elsif node.opening_loc + bounds(node.opening_loc) + on_lbrace("{") + end + elements = if node.elements.any? || !node.rest.nil? node.elements.map do |element| @@ -1886,12 +2356,21 @@ module Prism rest = case node.rest when AssocSplatNode + bounds(node.rest.operator_loc) + on_op("**") visit(node.rest.value) when NoKeywordsParameterNode bounds(node.rest.location) on_var_field(visit(node.rest)) end + if node.constant + bounds(node.closing_loc) + node.closing == "]" ? on_rbracket("]") : on_rparen(")") + elsif node.closing_loc + bounds(node.closing_loc) + on_rbrace("}") + end bounds(node.location) on_hshptn(constant, elements, rest) end @@ -1907,13 +2386,27 @@ module Prism def visit_if_node(node) if node.then_keyword == "?" predicate = visit(node.predicate) + + bounds(node.then_keyword_loc) + on_op("?") + truthy = visit(node.statements.body.first) - falsy = visit(node.consequent.statements.body.first) + + bounds(node.subsequent.else_keyword_loc) + on_op(":") + + falsy = visit(node.subsequent.statements.body.first) bounds(node.location) on_ifop(predicate, truthy, falsy) elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.if_keyword_loc) + on_kw(node.if_keyword) predicate = visit(node.predicate) + if node.then_keyword_loc && node.then_keyword != "?" + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -1921,16 +2414,23 @@ module Prism else visit(node.statements) end - consequent = visit(node.consequent) + subsequent = visit(node.subsequent) + + if node.end_keyword_loc && !node.subsequent + bounds(node.end_keyword_loc) + on_kw("end") + end bounds(node.location) if node.if_keyword == "if" - on_if(predicate, statements, consequent) + on_if(predicate, statements, subsequent) else - on_elsif(predicate, statements, consequent) + on_elsif(predicate, statements, subsequent) end else statements = visit(node.statements.body.first) + bounds(node.if_keyword_loc) + on_kw(node.if_keyword) predicate = visit(node.predicate) bounds(node.location) @@ -1960,9 +2460,16 @@ module Prism # ^^^^^^^^^^^^^^^^^^^^^ def visit_in_node(node) # This is a special case where we're not going to call on_in directly - # because we don't have access to the consequent. Instead, we'll return + # because we don't have access to the subsequent. Instead, we'll return # the component parts and let the parent node handle it. + bounds(node.in_loc) + on_kw("in") + pattern = visit_pattern_node(node.pattern) + if node.then_loc + bounds(node.then_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -1978,13 +2485,20 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_index_operator_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -1995,8 +2509,15 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_index_and_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2012,8 +2533,15 @@ module Prism # ^^^^^^^^^^^^^^^^ def visit_index_or_write_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) target = on_aref_field(receiver, arguments) @@ -2029,8 +2557,15 @@ module Prism # ^^^^^^^^ def visit_index_target_node(node) receiver = visit(node.receiver) + + bounds(node.opening_loc) + on_lbracket("[") + arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc)) + bounds(node.closing_loc) + on_rbracket("]") + bounds(node.location) on_aref_field(receiver, arguments) end @@ -2047,6 +2582,10 @@ module Prism def visit_instance_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_ivar(node.name.to_s)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -2059,8 +2598,8 @@ module Prism bounds(node.name_loc) target = on_var_field(on_ivar(node.name.to_s)) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -2149,20 +2688,37 @@ module Prism # "foo #{bar}" # ^^^^^^^^^^^^ def visit_interpolated_string_node(node) - if node.opening&.start_with?("<<~") - heredoc = visit_heredoc_string_node(node) + with_string_bounds(node) do + if node.opening&.start_with?("<<~") + heredoc = visit_heredoc_string_node(node) - bounds(node.location) - on_string_literal(heredoc) - elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } - first, *rest = node.parts - rest.inject(visit(first)) do |content, part| - concat = visit(part) + bounds(node.location) + on_string_literal(heredoc) + elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? } + first, *rest = node.parts + rest.inject(visit(first)) do |content, part| + concat = visit(part) + + bounds(part.location) + on_string_concat(content, concat) + end + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_string_content) do |content, part| + on_string_add(content, visit_string_content(part)) + end - bounds(part.location) - on_string_concat(content, concat) + bounds(node.location) + on_string_literal(parts) end - else + end + end + + # :"foo #{bar}" + # ^^^^^^^^^^^^^ + def visit_interpolated_symbol_node(node) + with_string_bounds(node) do bounds(node.parts.first.location) parts = node.parts.inject(on_string_content) do |content, part| @@ -2170,40 +2726,29 @@ module Prism end bounds(node.location) - on_string_literal(parts) + on_dyna_symbol(parts) end end - # :"foo #{bar}" - # ^^^^^^^^^^^^^ - def visit_interpolated_symbol_node(node) - bounds(node.parts.first.location) - parts = - node.parts.inject(on_string_content) do |content, part| - on_string_add(content, visit_string_content(part)) - end - - bounds(node.location) - on_dyna_symbol(parts) - end - # `foo #{bar}` # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) - if node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node) + with_string_bounds(node) do + if node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.parts.first.location) - parts = - node.parts.inject(on_xstring_new) do |content, part| - on_xstring_add(content, visit_string_content(part)) - end + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.parts.first.location) + parts = + node.parts.inject(on_xstring_new) do |content, part| + on_xstring_add(content, visit_string_content(part)) + end - bounds(node.location) - on_xstring_literal(parts) + bounds(node.location) + on_xstring_literal(parts) + end end end @@ -2218,6 +2763,13 @@ module Prism end # -> { it } + # ^^ + def visit_it_local_variable_read_node(node) + bounds(node.location) + on_vcall(on_ident(node.slice)) + end + + # -> { it } # ^^^^^^^^^ def visit_it_parameters_node(node) end @@ -2237,6 +2789,9 @@ module Prism # def foo(**); end # ^^ def visit_keyword_rest_parameter_node(node) + bounds(node.operator_loc) + on_op("**") + if node.name_loc.nil? bounds(node.location) on_kwrest_param(nil) @@ -2256,6 +2811,11 @@ module Prism parameters = if node.parameters.is_a?(BlockParametersNode) + if node.parameters.opening_loc + bounds(node.parameters.opening_loc) + on_lparen("(") + end + # Ripper does not track block-locals within lambdas, so we skip # directly to the parameters here. params = @@ -2266,6 +2826,13 @@ module Prism visit(node.parameters.parameters) end + visit_all(node.parameters.locals) + + if node.parameters.closing_loc + bounds(node.parameters.closing_loc) + on_rparen(")") + end + if node.parameters.opening_loc.nil? params else @@ -2278,9 +2845,11 @@ module Prism end braces = node.opening == "{" + bounds(node.opening_loc) if braces - bounds(node.opening_loc) on_tlambeg(node.opening) + else + on_kw("do") end body = @@ -2293,7 +2862,7 @@ module Prism braces ? stmts : on_bodystmt(stmts, nil, nil, nil) when StatementsNode stmts = node.body.body - stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) + stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false) stmts = visit_statements_node_body(stmts) bounds(node.body.location) @@ -2304,6 +2873,13 @@ module Prism raise end + bounds(node.closing_loc) + if braces + on_rbrace("}") + else + on_kw("end") + end + bounds(node.location) on_lambda(parameters, body) end @@ -2312,12 +2888,7 @@ module Prism # ^^^ def visit_local_variable_read_node(node) bounds(node.location) - - if node.name == :"0it" - on_vcall(on_ident(node.slice)) - else - on_var_ref(on_ident(node.slice)) - end + on_var_ref(on_ident(node.slice)) end # foo = 1 @@ -2325,6 +2896,10 @@ module Prism def visit_local_variable_write_node(node) bounds(node.name_loc) target = on_var_field(on_ident(node.name_loc.slice)) + + bounds(node.operator_loc) + on_op("=") + value = visit_write_value(node.value) bounds(node.location) @@ -2337,8 +2912,8 @@ module Prism bounds(node.name_loc) target = on_var_field(on_ident(node.name_loc.slice)) - bounds(node.operator_loc) - operator = on_op("#{node.operator}=") + bounds(node.binary_operator_loc) + operator = on_op("#{node.binary_operator}=") value = visit_write_value(node.value) bounds(node.location) @@ -2399,6 +2974,8 @@ module Prism # ^^^^^^^^^^ def visit_match_predicate_node(node) value = visit(node.value) + bounds(node.operator_loc) + on_kw("in") pattern = on_in(visit_pattern_node(node.pattern), nil, nil) on_case(value, pattern) @@ -2408,6 +2985,10 @@ module Prism # ^^^^^^^^^^ def visit_match_required_node(node) value = visit(node.value) + + bounds(node.operator_loc) + on_op("=>") + pattern = on_in(visit_pattern_node(node.pattern), nil, nil) on_case(value, pattern) @@ -2421,13 +3002,16 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. - def visit_missing_node(node) - raise "Cannot visit missing nodes directly." + def visit_error_recovery_node(node) + raise "Cannot visit error recovery nodes directly." end # module Foo; end # ^^^^^^^^^^^^^^^ def visit_module_node(node) + bounds(node.module_keyword_loc) + on_kw("module") + constant_path = if node.constant_path.is_a?(ConstantReadNode) bounds(node.constant_path.location) @@ -2438,6 +3022,9 @@ module Prism bodystmt = visit_body_node(node.constant_path.location, node.body, true) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_module(constant_path, bodystmt) end @@ -2445,9 +3032,19 @@ module Prism # (foo, bar), bar = qux # ^^^^^^^^^^ def visit_multi_target_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + if node.lparen_loc.nil? targets else @@ -2499,9 +3096,22 @@ module Prism # foo, bar = baz # ^^^^^^^^^^^^^^ def visit_multi_write_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + bounds(node.operator_loc) + on_op("=") + unless node.lparen_loc.nil? bounds(node.lparen_loc) targets = on_mlhs_paren(targets) @@ -2519,6 +3129,9 @@ module Prism # next foo # ^^^^^^^^ def visit_next_node(node) + bounds(node.keyword_loc) + on_kw("next") + if node.arguments.nil? bounds(node.location) on_next(on_args_new) @@ -2537,9 +3150,24 @@ module Prism on_var_ref(on_kw("nil")) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + bounds(node.operator_loc) + on_op("&") + bounds(node.keyword_loc) + on_kw("nil") + bounds(node.location) + on_blockarg(:nil) + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) + bounds(node.operator_loc) + on_op("**") + bounds(node.keyword_loc) + on_kw("nil") bounds(node.location) on_nokw_param(nil) @@ -2572,7 +3200,11 @@ module Prism # ^^^^^^^ def visit_optional_parameter_node(node) bounds(node.name_loc) - name = visit_token(node.name.to_s) + name = on_ident(node.name.to_s) + + bounds(node.operator_loc) + on_op("=") + value = visit(node.value) [name, value] @@ -2582,6 +3214,14 @@ module Prism # ^^^^^^ def visit_or_node(node) left = visit(node.left) + + bounds(node.operator_loc) + if node.operator == "or" + on_kw("or") + else + on_op("||") + end + right = visit(node.right) bounds(node.location) @@ -2605,9 +3245,19 @@ module Prism # Visit a destructured positional parameter node. private def visit_destructured_parameter_node(node) + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + bounds(node.location) targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false) + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + bounds(node.lparen_loc) on_mlhs_paren(targets) end @@ -2618,6 +3268,9 @@ module Prism # (1) # ^^^ def visit_parentheses_node(node) + bounds(node.opening_loc) + on_lparen("(") + body = if node.body.nil? on_stmts_add(on_stmts_new, on_void_stmt) @@ -2625,6 +3278,8 @@ module Prism visit(node.body) end + bounds(node.closing_loc) + on_rparen(")") bounds(node.location) on_paren(body) end @@ -2632,8 +3287,15 @@ module Prism # foo => ^(bar) # ^^^^^^ def visit_pinned_expression_node(node) + bounds(node.operator_loc) + on_op("^") + bounds(node.lparen_loc) + on_lparen("(") + expression = visit(node.expression) + bounds(node.rparen_loc) + on_rparen(")") bounds(node.location) on_begin(expression) end @@ -2641,12 +3303,20 @@ module Prism # foo = 1 and bar => ^foo # ^^^^ def visit_pinned_variable_node(node) + bounds(node.operator_loc) + on_op("^") + visit(node.variable) end # END {} # ^^^^^^ def visit_post_execution_node(node) + bounds(node.keyword_loc) + on_kw("END") + bounds(node.opening_loc) + on_lbrace("{") + statements = if node.statements.nil? bounds(node.location) @@ -2655,6 +3325,8 @@ module Prism visit(node.statements) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_END(statements) end @@ -2662,6 +3334,11 @@ module Prism # BEGIN {} # ^^^^^^^^ def visit_pre_execution_node(node) + bounds(node.keyword_loc) + on_kw("BEGIN") + bounds(node.opening_loc) + on_lbrace("{") + statements = if node.statements.nil? bounds(node.location) @@ -2670,6 +3347,8 @@ module Prism visit(node.statements) end + bounds(node.closing_loc) + on_rbrace("}") bounds(node.location) on_BEGIN(statements) end @@ -2677,7 +3356,7 @@ module Prism # The top-level program node. def visit_program_node(node) body = node.statements.body - body << nil if body.empty? + body = [nil] if body.empty? statements = visit_statements_node_body(body) bounds(node.location) @@ -2688,6 +3367,10 @@ module Prism # ^^^^ def visit_range_node(node) left = visit(node.left) + + bounds(node.operator_loc) + on_op(node.operator) + right = visit(node.right) bounds(node.location) @@ -2708,6 +3391,7 @@ module Prism # ^^^^ def visit_redo_node(node) bounds(node.location) + on_kw("redo") on_redo end @@ -2750,6 +3434,9 @@ module Prism # foo rescue bar # ^^^^^^^^^^^^^^ def visit_rescue_modifier_node(node) + bounds(node.keyword_loc) + on_kw("rescue") + expression = visit_write_value(node.expression) rescue_expression = visit(node.rescue_expression) @@ -2760,6 +3447,9 @@ module Prism # begin; rescue; end # ^^^^^^^ def visit_rescue_node(node) + bounds(node.keyword_loc) + on_kw("rescue") + exceptions = case node.exceptions.length when 0 @@ -2797,6 +3487,11 @@ module Prism end end + if node.operator_loc + bounds(node.operator_loc) + on_op("=>") + end + reference = visit(node.reference) statements = if node.statements.nil? @@ -2806,10 +3501,10 @@ module Prism visit(node.statements) end - consequent = visit(node.consequent) + subsequent = visit(node.subsequent) bounds(node.location) - on_rescue(exceptions, reference, statements, consequent) + on_rescue(exceptions, reference, statements, subsequent) end # def foo(*bar); end @@ -2818,12 +3513,15 @@ module Prism # def foo(*); end # ^ def visit_rest_parameter_node(node) + bounds(node.operator_loc) + on_op("*") + if node.name_loc.nil? bounds(node.location) on_rest_param(nil) else bounds(node.name_loc) - on_rest_param(visit_token(node.name.to_s)) + on_rest_param(on_ident(node.name.to_s)) end end @@ -2831,6 +3529,7 @@ module Prism # ^^^^^ def visit_retry_node(node) bounds(node.location) + on_kw("retry") on_retry end @@ -2840,6 +3539,9 @@ module Prism # return 1 # ^^^^^^^^ def visit_return_node(node) + bounds(node.keyword_loc) + on_kw("return") + if node.arguments.nil? bounds(node.location) on_return0 @@ -2866,9 +3568,17 @@ module Prism # class << self; end # ^^^^^^^^^^^^^^^^^^ def visit_singleton_class_node(node) + bounds(node.class_keyword_loc) + on_kw("class") + bounds(node.operator_loc) + on_op("<<") + expression = visit(node.expression) bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body) + bounds(node.end_keyword_loc) + on_kw("end") + bounds(node.location) on_sclass(expression, bodystmt) end @@ -2903,6 +3613,8 @@ module Prism # def foo(*); bar(*); end # ^ def visit_splat_node(node) + bounds(node.operator_loc) + on_op("*") visit(node.expression) end @@ -2925,26 +3637,68 @@ module Prism # "foo" # ^^^^^ def visit_string_node(node) - if (content = node.content).empty? - bounds(node.location) - on_string_literal(on_string_content) - elsif (opening = node.opening) == "?" - bounds(node.location) - on_CHAR("?#{node.content}") - elsif opening.start_with?("<<~") - heredoc = visit_heredoc_string_node(node.to_interpolated) + with_string_bounds(node) do + if (content = node.content).empty? + bounds(node.location) + on_string_literal(on_string_content) + elsif (opening = node.opening) == "?" + bounds(node.location) + on_CHAR("?#{node.content}") + elsif opening.start_with?("<<~") + heredoc = visit_heredoc_string_node(node.to_interpolated) - bounds(node.location) - on_string_literal(heredoc) - else - bounds(node.content_loc) - tstring_content = on_tstring_content(content) + bounds(node.location) + on_string_literal(heredoc) + else + bounds(node.content_loc) + tstring_content = on_tstring_content(content) - bounds(node.location) - on_string_literal(on_string_add(on_string_content, tstring_content)) + bounds(node.location) + on_string_literal(on_string_add(on_string_content, tstring_content)) + end end end + # Responsible for emitting the various string-like begin/end events + private def with_string_bounds(node) + # `foo "bar": baz` doesn't emit the closing location + assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":") + + is_heredoc = opening&.start_with?("<<") + if is_heredoc + bounds(node.opening_loc) + on_heredoc_beg(node.opening) + elsif opening&.start_with?(":", "%s") + bounds(node.opening_loc) + on_symbeg(node.opening) + elsif opening&.start_with?("`", "%x") + bounds(node.opening_loc) + on_backtick(node.opening) + elsif opening && !opening.start_with?("?") + bounds(node.opening_loc) + on_tstring_beg(opening) + end + + result = yield + if assoc + if node.closing != ":" + bounds(node.closing_loc) + on_label_end(node.closing) + end + return result + end + + if is_heredoc + bounds(node.closing_loc) + on_heredoc_end(node.closing) + elsif node.closing_loc + bounds(node.closing_loc) + on_tstring_end(node.closing) + end + + result + end + # Ripper gives back the escaped string content but strips out the common # leading whitespace. Prism gives back the unescaped string content and # a location for the escaped string content. Unfortunately these don't @@ -3022,42 +3776,39 @@ module Prism # Visit a heredoc node that is representing a string. private def visit_heredoc_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_string_content) do |parts, part| - on_string_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_string_content) do |parts, part| + on_string_add(parts, part) + end end # Visit a heredoc node that is representing an xstring. private def visit_heredoc_x_string_node(node) - bounds(node.opening_loc) - on_heredoc_beg(node.opening) - bounds(node.location) - result = - visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| - on_xstring_add(parts, part) - end - - bounds(node.closing_loc) - on_heredoc_end(node.closing) - - result + visit_heredoc_node(node.parts, on_xstring_new) do |parts, part| + on_xstring_add(parts, part) + end end # super(foo) # ^^^^^^^^^^ def visit_super_node(node) - arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + bounds(node.keyword_loc) + on_kw("super") + + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + + arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location)) + + if node.rparen_loc + bounds(node.rparen_loc) + on_rparen(")") + end + + block = visit(block_node) if !node.lparen_loc.nil? bounds(node.lparen_loc) @@ -3067,35 +3818,36 @@ module Prism bounds(node.location) call = on_super(arguments) - if block.nil? - call - else + if block_node bounds(node.block.location) on_method_add_block(call, block) + else + call end end # :foo # ^^^^ def visit_symbol_node(node) - if (opening = node.opening)&.match?(/^%s|['"]:?$/) - bounds(node.value_loc) - content = on_string_content - - if !(value = node.value).empty? - content = on_string_add(content, on_tstring_content(value)) + with_string_bounds(node) do + if node.value_loc.nil? + bounds(node.location) + on_dyna_symbol(on_string_content) + elsif (opening = node.opening)&.match?(/^%s|['"]:?$/) + bounds(node.value_loc) + content = on_string_add(on_string_content, on_tstring_content(node.value)) + bounds(node.location) + on_dyna_symbol(content) + elsif (closing = node.closing) == ":" + bounds(node.location) + on_label("#{node.value}:") + elsif opening.nil? && node.closing_loc.nil? + bounds(node.value_loc) + on_symbol_literal(visit_token(node.value)) + else + bounds(node.value_loc) + on_symbol_literal(on_symbol(visit_token(node.value))) end - - on_dyna_symbol(content) - elsif (closing = node.closing) == ":" - bounds(node.location) - on_label("#{node.value}:") - elsif opening.nil? && node.closing_loc.nil? - bounds(node.value_loc) - on_symbol_literal(visit_token(node.value)) - else - bounds(node.value_loc) - on_symbol_literal(on_symbol(visit_token(node.value))) end end @@ -3109,6 +3861,9 @@ module Prism # undef foo # ^^^^^^^^^ def visit_undef_node(node) + bounds(node.keyword_loc) + on_kw("undef") + names = visit_all(node.names) bounds(node.location) @@ -3122,7 +3877,13 @@ module Prism # ^^^^^^^^^^^^^^ def visit_unless_node(node) if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.keyword_loc) + on_kw("unless") predicate = visit(node.predicate) + if node.then_keyword_loc + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -3130,12 +3891,19 @@ module Prism else visit(node.statements) end - consequent = visit(node.consequent) + else_clause = visit(node.else_clause) + + if node.end_keyword_loc && !node.else_clause + bounds(node.end_keyword_loc) + on_kw("end") + end bounds(node.location) - on_unless(predicate, statements, consequent) + on_unless(predicate, statements, else_clause) else statements = visit(node.statements.body.first) + bounds(node.keyword_loc) + on_kw("unless") predicate = visit(node.predicate) bounds(node.location) @@ -3149,7 +3917,14 @@ module Prism # bar until foo # ^^^^^^^^^^^^^ def visit_until_node(node) + bounds(node.keyword_loc) + on_kw("until") + if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end predicate = visit(node.predicate) statements = if node.statements.nil? @@ -3159,6 +3934,11 @@ module Prism visit(node.statements) end + if node.closing_loc + bounds(node.closing_loc) + on_kw("end") + end + bounds(node.location) on_until(predicate, statements) else @@ -3174,9 +3954,16 @@ module Prism # ^^^^^^^^^^^^^ def visit_when_node(node) # This is a special case where we're not going to call on_when directly - # because we don't have access to the consequent. Instead, we'll return + # because we don't have access to the subsequent. Instead, we'll return # the component parts and let the parent node handle it. + bounds(node.keyword_loc) + on_kw("when") + conditions = visit_arguments(node.conditions) + if node.then_keyword_loc + bounds(node.then_keyword_loc) + on_kw("then") + end statements = if node.statements.nil? bounds(node.location) @@ -3195,7 +3982,17 @@ module Prism # ^^^^^^^^^^^^^ def visit_while_node(node) if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset) + bounds(node.keyword_loc) + on_kw("while") + if node.do_keyword_loc + bounds(node.do_keyword_loc) + on_kw("do") + end predicate = visit(node.predicate) + if node.closing_loc + bounds(node.closing_loc) + on_kw("end") + end statements = if node.statements.nil? bounds(node.location) @@ -3208,6 +4005,8 @@ module Prism on_while(predicate, statements) else statements = visit(node.statements.body.first) + bounds(node.keyword_loc) + on_kw("while") predicate = visit(node.predicate) bounds(node.location) @@ -3218,20 +4017,22 @@ module Prism # `foo` # ^^^^^ def visit_x_string_node(node) - if node.unescaped.empty? - bounds(node.location) - on_xstring_literal(on_xstring_new) - elsif node.opening.start_with?("<<~") - heredoc = visit_heredoc_x_string_node(node.to_interpolated) + with_string_bounds(node) do + if node.unescaped.empty? + bounds(node.location) + on_xstring_literal(on_xstring_new) + elsif node.opening.start_with?("<<~") + heredoc = visit_heredoc_x_string_node(node.to_interpolated) - bounds(node.location) - on_xstring_literal(heredoc) - else - bounds(node.content_loc) - content = on_tstring_content(node.content) + bounds(node.location) + on_xstring_literal(heredoc) + else + bounds(node.content_loc) + content = on_tstring_content(node.content) - bounds(node.location) - on_xstring_literal(on_xstring_add(on_xstring_new, content)) + bounds(node.location) + on_xstring_literal(on_xstring_add(on_xstring_new, content)) + end end end @@ -3241,10 +4042,18 @@ module Prism # yield 1 # ^^^^^^^ def visit_yield_node(node) + bounds(node.keyword_loc) + on_kw("yield") + if node.arguments.nil? && node.lparen_loc.nil? bounds(node.location) on_yield0 else + if node.lparen_loc + bounds(node.lparen_loc) + on_lparen("(") + end + arguments = if node.arguments.nil? bounds(node.location) @@ -3254,6 +4063,8 @@ module Prism end unless node.lparen_loc.nil? + bounds(node.rparen_loc) + on_rparen(")") bounds(node.lparen_loc) arguments = on_paren(arguments) end @@ -3267,7 +4078,11 @@ module Prism # Lazily initialize the parse result. def result - @result ||= Prism.parse(source) + @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding) + end + + def line_and_column_cache + @line_and_column_cache ||= LineAndColumnCache.new(result.source) end ########################################################################## @@ -3288,30 +4103,34 @@ module Prism # Visit the string content of a particular node. This method is used to # split into the various token types. def visit_token(token, allow_keywords = true) - case token - when "." + if token == "." on_period(token) - when "`" + elsif token == "`" on_backtick(token) - when *(allow_keywords ? KEYWORDS : []) + elsif allow_keywords && KEYWORDS.include?(token) on_kw(token) - when /^_/ + elsif token.start_with?("_") on_ident(token) - when /^[[:upper:]]\w*$/ + elsif token.match?(/^[[:upper:]]\w*$/) on_const(token) - when /^@@/ + elsif token.start_with?("@@") on_cvar(token) - when /^@/ + elsif token.start_with?("@") on_ivar(token) - when /^\$/ + elsif token.start_with?("$") on_gvar(token) - when /^[[:punct:]]/ + elsif token.match?(/^[[:punct:]]/) on_op(token) else on_ident(token) end end + # Visit either `.`, `&.`, or `::`. + def visit_call_operator(token) + token == "." ? on_period(token) : on_op(token) + end + # Visit a node that represents a number. We need to explicitly handle the # unary - operator. def visit_number_node(node) @@ -3319,6 +4138,9 @@ module Prism location = node.location if slice[0] == "-" + bounds(location.copy(length: 1)) + on_op("-") + bounds(location.copy(start_offset: location.start_offset + 1)) value = yield slice[1..-1] @@ -3367,26 +4189,24 @@ module Prism # This method is responsible for updating lineno and column information # to reflect the current node. - # - # This method could be drastically improved with some caching on the start - # of every line, but for now it's good enough. def bounds(location) - @lineno = location.start_line - @column = location.start_column + @lineno, @column = line_and_column_cache.line_and_column(location.start_offset) end + # :startdoc: + ########################################################################## # Ripper interface ########################################################################## # :stopdoc: def _dispatch_0; end - def _dispatch_1(_); end - def _dispatch_2(_, _); end - def _dispatch_3(_, _, _); end - def _dispatch_4(_, _, _, _); end - def _dispatch_5(_, _, _, _, _); end - def _dispatch_7(_, _, _, _, _, _, _); end + def _dispatch_1(arg); arg end + def _dispatch_2(arg, _); arg end + def _dispatch_3(arg, _, _); arg end + def _dispatch_4(arg, _, _, _); arg end + def _dispatch_5(arg, _, _, _, _); arg end + def _dispatch_7(arg, _, _, _, _, _, _); arg end # :startdoc: # diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb new file mode 100644 index 0000000000..19deef2d37 --- /dev/null +++ b/lib/prism/translation/ripper/filter.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Ripper + class Filter # :nodoc: + # :stopdoc: + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + def filename + @__lexer.filename + end + + def lineno + @__line + end + + def column + @__col + end + + def state + @__state + end + + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + def on_default(event, token, data) + data + end + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb new file mode 100644 index 0000000000..c6aeae4bd7 --- /dev/null +++ b/lib/prism/translation/ripper/lexer.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true +# :markup: markdown + +require_relative "../ripper" + +module Prism + module Translation + class Ripper + class Lexer < Ripper # :nodoc: + class State # :nodoc: + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = Ripper.lex_state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + + # Instances are frozen and there are only a handful of them so we + # cache them here. + STATES = Hash.new { |hash, key| hash[key] = State.new(key) } + private_constant :STATES + + def self.[](i) + STATES[i] + end + end + + class Elem # :nodoc: + attr_accessor :pos, :event, :tok, :state, :message + + def initialize(pos, event, tok, state, message = nil) + @pos = pos + @event = event + @tok = tok + @state = State[state] + @message = message + end + + def [](index) + case index + when 0, :pos + @pos + when 1, :event + @event + when 2, :tok + @tok + when 3, :state + @state + when 4, :message + @message + else + nil + end + end + + def inspect + "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>" + end + + alias to_s inspect + + def pretty_print(q) + q.group(2, "#<#{self.class}:", ">") { + q.breakable + q.text("#{event}@#{pos[0]}:#{pos[1]}") + q.breakable + state.pretty_print(q) + q.breakable + q.text("token: ") + tok.pretty_print(q) + if message + q.breakable + q.text("message: ") + q.text(message) + end + } + end + + def to_a + if @message + [@pos, @event, @tok, @state, @message] + else + [@pos, @event, @tok, @state] + end + end + end + + # Pretty much just the same as Prism.lex_compat. + def lex(raise_errors: false) + Ripper.lex(@source, filename, lineno, raise_errors: raise_errors) + end + + # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. + # Since ripper is a streaming parser, tokens are expected to be emitted in the order + # that the parser encounters them. This is not implemented. + def parse(...) + lex(...).map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) + end + end + + # Similar to parse but ripper sorts the elements by position in the source. Also + # includes errors. Since prism does error recovery, in cases of syntax errors + # the result may differ greatly compared to ripper. + def scan(...) + parse(...) + end + end + end + end +end diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb index dc26a639a3..46c0333544 100644 --- a/lib/prism/translation/ripper/sexp.rb +++ b/lib/prism/translation/ripper/sexp.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true +# :markup: markdown require_relative "../ripper" @@ -7,9 +8,7 @@ module Prism class Ripper # This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that # returns the arrays of [type, *children]. - class SexpBuilder < Ripper - # :stopdoc: - + class SexpBuilder < Ripper # :nodoc: attr_reader :error private @@ -64,16 +63,12 @@ module Prism remove_method :on_parse_error alias on_parse_error on_error alias compile_error on_error - - # :startdoc: end # This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that # returns the same values as ::Ripper::SexpBuilder except with a couple of # niceties that flatten linked lists into arrays. - class SexpBuilderPP < SexpBuilder - # :stopdoc: - + class SexpBuilderPP < SexpBuilder # :nodoc: private def on_heredoc_dedent(val, width) @@ -117,8 +112,6 @@ module Prism alias_method "on_#{event}", :_dispatch_event_push end end - - # :startdoc: end end end diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb index 10e21cd16a..00ed625da3 100644 --- a/lib/prism/translation/ripper/shim.rb +++ b/lib/prism/translation/ripper/shim.rb @@ -2,4 +2,6 @@ # This writes the prism ripper translation into the Ripper constant so that # users can transparently use Ripper without any changes. +# :stopdoc: Ripper = Prism::Translation::Ripper +# :startdoc: diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index 5c59fe3181..42bc5ee658 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -1,6 +1,17 @@ # frozen_string_literal: true +# :markup: markdown -require "ruby_parser" +begin + require "sexp" +rescue LoadError + warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.}) + exit(1) +end + +class RubyParser # :nodoc: + class SyntaxError < RuntimeError # :nodoc: + end +end module Prism module Translation @@ -8,9 +19,9 @@ module Prism # seattlerb/ruby_parser gem's syntax tree. class RubyParser # A prism visitor that builds Sexp objects. - class Compiler < ::Prism::Compiler + class Compiler < ::Prism::Compiler # :nodoc: # This is the name of the file that we are compiling. We set it on every - # Sexp object that is generated, and also use it to compile __FILE__ + # Sexp object that is generated, and also use it to compile `__FILE__` # nodes. attr_reader :file @@ -50,7 +61,19 @@ module Prism # a and b # ^^^^^^^ def visit_and_node(node) - s(node, :and, visit(node.left), visit(node.right)) + left = visit(node.left) + + if left[0] == :and + # ruby_parser has the and keyword as right-associative as opposed to + # prism which has it as left-associative. We reverse that + # associativity here. + nest = left + nest = nest[2] while nest[2][0] == :and + nest[2] = s(node, :and, nest[2], visit(node.right)) + left + else + s(node, :and, left, visit(node.right)) + end end # [] @@ -114,7 +137,7 @@ module Prism # $+ # ^^ def visit_back_reference_read_node(node) - s(node, :back_ref, node.name.name.delete_prefix("$").to_sym) + s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym) end # begin end @@ -130,7 +153,7 @@ module Prism end current = node.rescue_clause - until (current = current.consequent).nil? + until (current = current.subsequent).nil? result << visit(current) end end @@ -246,6 +269,11 @@ module Prism when RegularExpressionNode, InterpolatedRegularExpressionNode return s(node, :match2, visit(node.receiver), visit(node.arguments.arguments.first)) end + + case node.arguments.arguments.first + when RegularExpressionNode, InterpolatedRegularExpressionNode + return s(node, :match3, visit(node.arguments.arguments.first), visit(node.receiver)) + end end end @@ -271,9 +299,9 @@ module Prism # ^^^^^^^^^^^^^^^ def visit_call_operator_write_node(node) if op_asgn?(node) - s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, node.operator) + s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, node.binary_operator) else - s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, node.operator, visit_write_value(node.value)) + s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, node.binary_operator, visit_write_value(node.value)) end end @@ -325,13 +353,13 @@ module Prism # case foo; when bar; end # ^^^^^^^^^^^^^^^^^^^^^^^ def visit_case_node(node) - s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.consequent) + s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.else_clause) end # case foo; in bar; end # ^^^^^^^^^^^^^^^^^^^^^ def visit_case_match_node(node) - s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.consequent) + s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.else_clause) end # class Foo; end @@ -344,14 +372,18 @@ module Prism visit(node.constant_path) end - if node.body.nil? - s(node, :class, name, visit(node.superclass)) - elsif node.body.is_a?(StatementsNode) - compiler = copy_compiler(in_def: false) - s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) - else - s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) - end + result = + if node.body.nil? + s(node, :class, name, visit(node.superclass)) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false))) + end + + attach_comments(result, node) + result end # @@foo @@ -362,9 +394,6 @@ module Prism # @@foo = 1 # ^^^^^^^^^ - # - # @@foo, @@bar = 1 - # ^^^^^ ^^^^^ def visit_class_variable_write_node(node) s(node, class_variable_write_type, node.name, visit_write_value(node.value)) end @@ -372,7 +401,7 @@ module Prism # @@foo += bar # ^^^^^^^^^^^^ def visit_class_variable_operator_write_node(node) - s(node, class_variable_write_type, node.name, s(node, :call, s(node, :cvar, node.name), node.operator, visit_write_value(node.value))) + s(node, class_variable_write_type, node.name, s(node, :call, s(node, :cvar, node.name), node.binary_operator, visit_write_value(node.value))) end # @@foo &&= bar @@ -417,7 +446,7 @@ module Prism # Foo += bar # ^^^^^^^^^^^ def visit_constant_operator_write_node(node) - s(node, :cdecl, node.name, s(node, :call, s(node, :const, node.name), node.operator, visit_write_value(node.value))) + s(node, :cdecl, node.name, s(node, :call, s(node, :const, node.name), node.binary_operator, visit_write_value(node.value))) end # Foo &&= bar @@ -442,9 +471,9 @@ module Prism # ^^^^^^^^ def visit_constant_path_node(node) if node.parent.nil? - s(node, :colon3, node.child.name) + s(node, :colon3, node.name) else - s(node, :colon2, visit(node.parent), node.child.name) + s(node, :colon2, visit(node.parent), node.name) end end @@ -460,7 +489,7 @@ module Prism # Foo::Bar += baz # ^^^^^^^^^^^^^^^ def visit_constant_path_operator_write_node(node) - s(node, :op_asgn, visit(node.target), node.operator, visit_write_value(node.value)) + s(node, :op_asgn, visit(node.target), node.binary_operator, visit_write_value(node.value)) end # Foo::Bar &&= baz @@ -480,9 +509,9 @@ module Prism def visit_constant_path_target_node(node) inner = if node.parent.nil? - s(node, :colon3, node.child.name) + s(node, :colon3, node.name) else - s(node, :colon2, visit(node.parent), node.child.name) + s(node, :colon2, visit(node.parent), node.name) end s(node, :const, inner) @@ -502,7 +531,9 @@ module Prism s(node, :defs, visit(node.receiver), name) end + attach_comments(result, node) result.line(node.name_loc.start_line) + if node.parameters.nil? result << s(node, :args).line(node.name_loc.start_line) else @@ -617,9 +648,6 @@ module Prism # $foo = 1 # ^^^^^^^^ - # - # $foo, $bar = 1 - # ^^^^ ^^^^ def visit_global_variable_write_node(node) s(node, :gasgn, node.name, visit_write_value(node.value)) end @@ -627,7 +655,7 @@ module Prism # $foo += bar # ^^^^^^^^^^^ def visit_global_variable_operator_write_node(node) - s(node, :gasgn, node.name, s(node, :call, s(node, :gvar, node.name), node.operator, visit(node.value))) + s(node, :gasgn, node.name, s(node, :call, s(node, :gvar, node.name), node.binary_operator, visit(node.value))) end # $foo &&= bar @@ -678,7 +706,7 @@ module Prism # foo ? bar : baz # ^^^^^^^^^^^^^^^ def visit_if_node(node) - s(node, :if, visit(node.predicate), visit(node.statements), visit(node.consequent)) + s(node, :if, visit(node.predicate), visit(node.statements), visit(node.subsequent)) end # 1i @@ -719,7 +747,7 @@ module Prism arglist << visit(node.block) if !node.block.nil? end - s(node, :op_asgn1, visit(node.receiver), arglist, node.operator, visit_write_value(node.value)) + s(node, :op_asgn1, visit(node.receiver), arglist, node.binary_operator, visit_write_value(node.value)) end # foo[bar] &&= baz @@ -765,9 +793,6 @@ module Prism # @foo = 1 # ^^^^^^^^ - # - # @foo, @bar = 1 - # ^^^^ ^^^^ def visit_instance_variable_write_node(node) s(node, :iasgn, node.name, visit_write_value(node.value)) end @@ -775,7 +800,7 @@ module Prism # @foo += bar # ^^^^^^^^^^^ def visit_instance_variable_operator_write_node(node) - s(node, :iasgn, node.name, s(node, :call, s(node, :ivar, node.name), node.operator, visit_write_value(node.value))) + s(node, :iasgn, node.name, s(node, :call, s(node, :ivar, node.name), node.binary_operator, visit_write_value(node.value))) end # @foo &&= bar @@ -805,17 +830,29 @@ module Prism # if /foo #{bar}/ then end # ^^^^^^^^^^^^ def visit_interpolated_match_last_line_node(node) - s(node, :match, s(node, :dregx).concat(visit_interpolated_parts(node.parts))) + parts = visit_interpolated_parts(node.parts) + regexp = + if parts.length == 1 + s(node, :lit, Regexp.new(parts.first, node.options)) + else + s(node, :dregx).concat(parts).tap do |result| + options = node.options + result << options if options != 0 + end + end + + s(node, :match, regexp) end # /foo #{bar}/ # ^^^^^^^^^^^^ def visit_interpolated_regular_expression_node(node) - if node.parts.all? { |part| part.is_a?(StringNode) || (part.is_a?(EmbeddedStatementsNode) && part.statements&.body&.length == 1 && part.statements.body.first.is_a?(StringNode)) } - unescaped = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : part.statements.body.first.unescaped }.join - s(node, :lit, Regexp.new(unescaped, node.options)) + parts = visit_interpolated_parts(node.parts) + + if parts.length == 1 + s(node, :lit, Regexp.new(parts.first, node.options)) else - s(node, :dregx).concat(visit_interpolated_parts(node.parts)).tap do |result| + s(node, :dregx).concat(parts).tap do |result| options = node.options result << options if options != 0 end @@ -825,47 +862,102 @@ module Prism # "foo #{bar}" # ^^^^^^^^^^^^ def visit_interpolated_string_node(node) - if (node.parts.all? { |part| part.is_a?(StringNode) || (part.is_a?(EmbeddedStatementsNode) && part.statements&.body&.length == 1 && part.statements.body.first.is_a?(StringNode)) }) || - (node.opening.nil? && node.parts.all? { |part| part.is_a?(StringNode) && !part.opening_loc.nil? }) - unescaped = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : part.statements.body.first.unescaped }.join - s(node, :str, unescaped) - else - s(node, :dstr).concat(visit_interpolated_parts(node.parts)) - end + parts = visit_interpolated_parts(node.parts) + parts.length == 1 ? s(node, :str, parts.first) : s(node, :dstr).concat(parts) end # :"foo #{bar}" # ^^^^^^^^^^^^^ def visit_interpolated_symbol_node(node) - if node.parts.all? { |part| part.is_a?(StringNode) || (part.is_a?(EmbeddedStatementsNode) && part.statements&.body&.length == 1 && part.statements.body.first.is_a?(StringNode)) } - unescaped = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : part.statements.body.first.unescaped }.join - s(node, :lit, unescaped.to_sym) - else - s(node, :dsym).concat(visit_interpolated_parts(node.parts)) - end + parts = visit_interpolated_parts(node.parts) + parts.length == 1 ? s(node, :lit, parts.first.to_sym) : s(node, :dsym).concat(parts) end # `foo #{bar}` # ^^^^^^^^^^^^ def visit_interpolated_x_string_node(node) - children = visit_interpolated_parts(node.parts) - s(node.heredoc? ? node.parts.first : node, :dxstr).concat(children) + source = node.heredoc? ? node.parts.first : node + parts = visit_interpolated_parts(node.parts) + parts.length == 1 ? s(source, :xstr, parts.first) : s(source, :dxstr).concat(parts) end # Visit the interpolated content of the string-like node. private def visit_interpolated_parts(parts) - parts.each_with_object([]).with_index do |(part, results), index| - if index == 0 - if part.is_a?(StringNode) - results << part.unescaped + visited = [] + + parts.each do |part| + result = visit(part) + + if result[0] == :evstr && result[1] + if result[1][0] == :str + visited << result[1] + elsif result[1][0] == :dstr + visited.concat(result[1][1..-1]) else - results << "" - results << visit(part) + visited << result + end + visited << :space + elsif result[0] == :dstr + if !visited.empty? && part.parts[0].is_a?(StringNode) + # If we are in the middle of an implicitly concatenated string, + # we should not have a bare string as the first part. In this + # case we need to visit just that first part and then we can + # push the rest of the parts onto the visited array. + result[1] = visit(part.parts[0]) end + visited.concat(result[1..-1]) else - results << visit(part) + visited << result end end + + state = :beginning #: :beginning | :string_content | :interpolated_content + results = [] + + visited.each_with_index do |result, index| + case state + when :beginning + if result.is_a?(String) + results << result + state = :string_content + elsif result.is_a?(Array) && result[0] == :str + results << result[1] + state = :string_content + else + results << "" + results << result + state = :interpolated_content + end + when :string_content + if result == :space + # continue + elsif result.is_a?(String) + results[0] = "#{results[0]}#{result}" + elsif result.is_a?(Array) && result[0] == :str + results[0] = "#{results[0]}#{result[1]}" + else + results << result + state = :interpolated_content + end + when :interpolated_content + if result == :space + # continue + elsif visited[index - 1] != :space && result.is_a?(Array) && result[0] == :str && results[-1][0] == :str && (results[-1].line_max == result.line) + results[-1][1] = "#{results[-1][1]}#{result[1]}" + results[-1].line_max = result.line_max + else + results << result + end + end + end + + results + end + + # -> { it } + # ^^ + def visit_it_local_variable_read_node(node) + s(node, :call, nil, :it) end # foo(bar: baz) @@ -887,8 +979,8 @@ module Prism def visit_lambda_node(node) parameters = case node.parameters - when nil, NumberedParametersNode - s(node, :args) + when nil, ItParametersNode, NumberedParametersNode + 0 else visit(node.parameters) end @@ -912,9 +1004,6 @@ module Prism # foo = 1 # ^^^^^^^ - # - # foo, bar = 1 - # ^^^ ^^^ def visit_local_variable_write_node(node) s(node, :lasgn, node.name, visit_write_value(node.value)) end @@ -922,7 +1011,7 @@ module Prism # foo += bar # ^^^^^^^^^^ def visit_local_variable_operator_write_node(node) - s(node, :lasgn, node.name, s(node, :call, s(node, :lvar, node.name), node.operator, visit_write_value(node.value))) + s(node, :lasgn, node.name, s(node, :call, s(node, :lvar, node.name), node.binary_operator, visit_write_value(node.value))) end # foo &&= bar @@ -970,8 +1059,8 @@ module Prism # A node that is missing from the syntax tree. This is only used in the # case of a syntax error. The parser gem doesn't have such a concept, so # we invent our own here. - def visit_missing_node(node) - raise "Cannot visit missing node directly" + def visit_error_recovery_node(node) + raise "Cannot visit error recovery node directly" end # module Foo; end @@ -984,14 +1073,18 @@ module Prism visit(node.constant_path) end - if node.body.nil? - s(node, :module, name) - elsif node.body.is_a?(StatementsNode) - compiler = copy_compiler(in_def: false) - s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) - else - s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) - end + result = + if node.body.nil? + s(node, :module, name) + elsif node.body.is_a?(StatementsNode) + compiler = copy_compiler(in_def: false) + s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) }) + else + s(node, :module, name, node.body.accept(copy_compiler(in_def: false))) + end + + attach_comments(result, node) + result end # foo, bar = baz @@ -1047,6 +1140,12 @@ module Prism s(node, :nil) end + # def foo(&nil); end + # ^^^^ + def visit_no_block_parameter_node(node) + :"&nil" + end + # def foo(**nil); end # ^^^^^ def visit_no_keywords_parameter_node(node) @@ -1080,14 +1179,26 @@ module Prism # a or b # ^^^^^^ def visit_or_node(node) - s(node, :or, visit(node.left), visit(node.right)) + left = visit(node.left) + + if left[0] == :or + # ruby_parser has the or keyword as right-associative as opposed to + # prism which has it as left-associative. We reverse that + # associativity here. + nest = left + nest = nest[2] while nest[2][0] == :or + nest[2] = s(node, :or, nest[2], visit(node.right)) + left + else + s(node, :or, left, visit(node.right)) + end end # def foo(bar, *baz); end # ^^^^^^^^^ def visit_parameters_node(node) children = - node.compact_child_nodes.map do |element| + node.each_child_node.map do |element| if element.is_a?(MultiTargetNode) visit_destructured_parameter(element) else @@ -1297,7 +1408,7 @@ module Prism # __FILE__ # ^^^^^^^^ def visit_source_file_node(node) - s(node, :str, file) + s(node, :str, node.filepath) end # __LINE__ @@ -1336,7 +1447,14 @@ module Prism # "foo" # ^^^^^ def visit_string_node(node) - s(node, :str, node.unescaped) + unescaped = node.unescaped + + if node.forced_binary_encoding? + unescaped = unescaped.dup + unescaped.force_encoding(Encoding::BINARY) + end + + s(node, :str, unescaped) end # super(foo) @@ -1378,7 +1496,7 @@ module Prism # bar unless foo # ^^^^^^^^^^^^^^ def visit_unless_node(node) - s(node, :if, visit(node.predicate), visit(node.consequent), visit(node.statements)) + s(node, :if, visit(node.predicate), visit(node.else_clause), visit(node.statements)) end # until foo; bar end @@ -1429,6 +1547,17 @@ module Prism private + # Attach prism comments to the given sexp. + def attach_comments(sexp, node) + return unless node.comments + return if node.comments.empty? + + extra = node.location.start_line - node.comments.last.location.start_line + comments = node.comments.map(&:slice) + comments.concat([nil] * [0, extra].max) + sexp.comments = comments.join("\n") + end + # Create a new compiler with the given options. def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern) Compiler.new(file, in_def: in_def, in_pattern: in_pattern) @@ -1451,7 +1580,7 @@ module Prism else parameters = case block.parameters - when nil, NumberedParametersNode + when nil, ItParametersNode, NumberedParametersNode 0 else visit(block.parameters) @@ -1498,13 +1627,21 @@ module Prism # Parse the given source and translate it into the seattlerb/ruby_parser # gem's Sexp format. def parse(source, filepath = "(string)") - translate(Prism.parse(source), filepath) + translate(Prism.parse(source, filepath: filepath, partial_script: true), filepath) end # Parse the given file and translate it into the seattlerb/ruby_parser # gem's Sexp format. def parse_file(filepath) - translate(Prism.parse_file(filepath), filepath) + translate(Prism.parse_file(filepath, partial_script: true), filepath) + end + + # Parse the give file and translate it into the + # seattlerb/ruby_parser gem's Sexp format. This method is + # provided for API compatibility to RubyParser and takes an + # optional +timeout+ argument. + def process(ruby, file = "(string)", timeout = nil) + Timeout.timeout(timeout) { parse(ruby, file) } end class << self @@ -1531,6 +1668,7 @@ module Prism raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}" end + result.attach_comments! result.value.accept(Compiler.new(filepath)) end end |
