summaryrefslogtreecommitdiff
path: root/lib/prism
diff options
context:
space:
mode:
Diffstat (limited to 'lib/prism')
-rw-r--r--lib/prism/debug.rb249
-rw-r--r--lib/prism/desugar_compiler.rb319
-rw-r--r--lib/prism/ffi.rb355
-rw-r--r--lib/prism/lex_compat.rb478
-rw-r--r--lib/prism/node_ext.rb196
-rw-r--r--lib/prism/node_find.rb185
-rw-r--r--lib/prism/node_inspector.rb68
-rw-r--r--lib/prism/pack.rb228
-rw-r--r--lib/prism/parse_result.rb835
-rw-r--r--lib/prism/parse_result/comments.rb53
-rw-r--r--lib/prism/parse_result/errors.rb72
-rw-r--r--lib/prism/parse_result/newlines.rb174
-rw-r--r--lib/prism/pattern.rb100
-rw-r--r--lib/prism/polyfill/append_as_bytes.rb15
-rw-r--r--lib/prism/polyfill/byteindex.rb13
-rw-r--r--lib/prism/polyfill/scan_byte.rb14
-rw-r--r--lib/prism/polyfill/string.rb12
-rw-r--r--lib/prism/polyfill/unpack1.rb14
-rw-r--r--lib/prism/polyfill/warn.rb36
-rw-r--r--lib/prism/prism.gemspec209
-rw-r--r--lib/prism/relocation.rb665
-rw-r--r--lib/prism/string_query.rb46
-rw-r--r--lib/prism/translation.rb11
-rw-r--r--lib/prism/translation/parser.rb96
-rw-r--r--lib/prism/translation/parser/builder.rb70
-rw-r--r--lib/prism/translation/parser/compiler.rb822
-rw-r--r--lib/prism/translation/parser/lexer.rb531
-rw-r--r--lib/prism/translation/parser/rubocop.rb73
-rw-r--r--lib/prism/translation/parser33.rb12
-rw-r--r--lib/prism/translation/parser34.rb12
-rw-r--r--lib/prism/translation/parser_current.rb26
-rw-r--r--lib/prism/translation/parser_versions.rb36
-rw-r--r--lib/prism/translation/ripper.rb1446
-rw-r--r--lib/prism/translation/ripper/filter.rb53
-rw-r--r--lib/prism/translation/ripper/lexer.rb133
-rw-r--r--lib/prism/translation/ripper/sexp.rb13
-rw-r--r--lib/prism/translation/ripper/shim.rb2
-rw-r--r--lib/prism/translation/ruby_parser.rb316
38 files changed, 5893 insertions, 2095 deletions
diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb
deleted file mode 100644
index 74f824faa7..0000000000
--- a/lib/prism/debug.rb
+++ /dev/null
@@ -1,249 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- # This module is used for testing and debugging and is not meant to be used by
- # consumers of this library.
- module Debug
- # A wrapper around a RubyVM::InstructionSequence that provides a more
- # convenient interface for accessing parts of the iseq.
- class ISeq # :nodoc:
- attr_reader :parts
-
- def initialize(parts)
- @parts = parts
- end
-
- def type
- parts[0]
- end
-
- def local_table
- parts[10]
- end
-
- def instructions
- parts[13]
- end
-
- def each_child
- instructions.each do |instruction|
- # Only look at arrays. Other instructions are line numbers or
- # tracepoint events.
- next unless instruction.is_a?(Array)
-
- instruction.each do |opnd|
- # Only look at arrays. Other operands are literals.
- next unless opnd.is_a?(Array)
-
- # Only look at instruction sequences. Other operands are literals.
- next unless opnd[0] == "YARVInstructionSequence/SimpleDataFormat"
-
- yield ISeq.new(opnd)
- end
- end
- end
- end
-
- private_constant :ISeq
-
- # :call-seq:
- # Debug::cruby_locals(source) -> Array
- #
- # For the given source, compiles with CRuby and returns a list of all of the
- # sets of local variables that were encountered.
- def self.cruby_locals(source)
- verbose, $VERBOSE = $VERBOSE, nil
-
- begin
- locals = [] #: Array[Array[Symbol | Integer]]
- stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)]
-
- while (iseq = stack.pop)
- names = [*iseq.local_table]
- names.map!.with_index do |name, index|
- # When an anonymous local variable is present in the iseq's local
- # table, it is represented as the stack offset from the top.
- # However, when these are dumped to binary and read back in, they
- # are replaced with the symbol :#arg_rest. To consistently handle
- # this, we replace them here with their index.
- if name == :"#arg_rest"
- names.length - index + 1
- else
- name
- end
- end
-
- locals << names
- iseq.each_child { |child| stack << child }
- end
-
- locals
- ensure
- $VERBOSE = verbose
- end
- end
-
- # Used to hold the place of a local that will be in the local table but
- # cannot be accessed directly from the source code. For example, the
- # iteration variable in a for loop or the positional parameter on a method
- # definition that is destructured.
- AnonymousLocal = Object.new
- private_constant :AnonymousLocal
-
- # :call-seq:
- # Debug::prism_locals(source) -> Array
- #
- # For the given source, parses with prism and returns a list of all of the
- # sets of local variables that were encountered.
- def self.prism_locals(source)
- locals = [] #: Array[Array[Symbol | Integer]]
- stack = [Prism.parse(source).value] #: Array[Prism::node]
-
- while (node = stack.pop)
- case node
- when BlockNode, DefNode, LambdaNode
- names = node.locals
- params =
- if node.is_a?(DefNode)
- node.parameters
- elsif node.parameters.is_a?(NumberedParametersNode)
- nil
- else
- node.parameters&.parameters
- end
-
- # prism places parameters in the same order that they appear in the
- # source. CRuby places them in the order that they need to appear
- # according to their own internal calling convention. We mimic that
- # order here so that we can compare properly.
- if params
- sorted = [
- *params.requireds.map do |required|
- if required.is_a?(RequiredParameterNode)
- required.name
- else
- AnonymousLocal
- end
- end,
- *params.optionals.map(&:name),
- *((params.rest.name || :*) if params.rest && !params.rest.is_a?(ImplicitRestNode)),
- *params.posts.map do |post|
- if post.is_a?(RequiredParameterNode)
- post.name
- else
- AnonymousLocal
- end
- end,
- *params.keywords.grep(RequiredKeywordParameterNode).map(&:name),
- *params.keywords.grep(OptionalKeywordParameterNode).map(&:name),
- ]
-
- sorted << AnonymousLocal if params.keywords.any?
-
- if params.keyword_rest.is_a?(ForwardingParameterNode)
- sorted.push(:*, :**, :&, :"...")
- elsif params.keyword_rest.is_a?(KeywordRestParameterNode)
- sorted << (params.keyword_rest.name || :**)
- end
-
- # Recurse down the parameter tree to find any destructured
- # parameters and add them after the other parameters.
- param_stack = params.requireds.concat(params.posts).grep(MultiTargetNode).reverse
- while (param = param_stack.pop)
- case param
- when MultiTargetNode
- param_stack.concat(param.rights.reverse)
- param_stack << param.rest if param.rest&.expression && !sorted.include?(param.rest.expression.name)
- param_stack.concat(param.lefts.reverse)
- when RequiredParameterNode
- sorted << param.name
- when SplatNode
- sorted << param.expression.name
- end
- end
-
- if params.block
- sorted << (params.block.name || :&)
- end
-
- names = sorted.concat(names - sorted)
- end
-
- names.map!.with_index do |name, index|
- if name == AnonymousLocal
- names.length - index + 1
- else
- name
- end
- end
-
- locals << names
- when ClassNode, ModuleNode, ProgramNode, SingletonClassNode
- locals << node.locals
- when ForNode
- locals << [2]
- when PostExecutionNode
- locals.push([], [])
- when InterpolatedRegularExpressionNode
- locals << [] if node.once?
- end
-
- stack.concat(node.compact_child_nodes)
- end
-
- locals
- end
-
- # :call-seq:
- # Debug::newlines(source) -> Array
- #
- # For the given source string, return the byte offsets of every newline in
- # the source.
- def self.newlines(source)
- Prism.parse(source).source.offsets
- end
-
- # A wrapping around prism's internal encoding data structures. This is used
- # for reflection and debugging purposes.
- class Encoding
- # The name of the encoding, that can be passed to Encoding.find.
- attr_reader :name
-
- # Initialize a new encoding with the given name and whether or not it is
- # a multibyte encoding.
- def initialize(name, multibyte)
- @name = name
- @multibyte = multibyte
- end
-
- # Whether or not the encoding is a multibyte encoding.
- def multibyte?
- @multibyte
- end
-
- # Returns the number of bytes of the first character in the source string,
- # if it is valid for the encoding. Otherwise, returns 0.
- def width(source)
- Encoding._width(name, source)
- end
-
- # Returns true if the first character in the source string is a valid
- # alphanumeric character for the encoding.
- def alnum?(source)
- Encoding._alnum?(name, source)
- end
-
- # Returns true if the first character in the source string is a valid
- # alphabetic character for the encoding.
- def alpha?(source)
- Encoding._alpha?(name, source)
- end
-
- # Returns true if the first character in the source string is a valid
- # uppercase character for the encoding.
- def upper?(source)
- Encoding._upper?(name, source)
- end
- end
- end
-end
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb
index 8d059b0c98..c64d03f64a 100644
--- a/lib/prism/desugar_compiler.rb
+++ b/lib/prism/desugar_compiler.rb
@@ -1,120 +1,186 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class DesugarAndWriteNode # :nodoc:
- attr_reader :node, :source, :read_class, :write_class, :arguments
+ include DSL
- def initialize(node, source, read_class, write_class, *arguments)
+ attr_reader :node #: ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+
+ #: ((ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
+ def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
- @source = source
+ @default_source = default_source
@read_class = read_class
@write_class = write_class
@arguments = arguments
end
# Desugar `x &&= y` to `x && x = y`
+ #--
+ #: () -> node
def compile
- AndNode.new(
- source,
- read_class.new(source, *arguments, node.name_loc),
- write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location),
- node.operator_loc,
- node.location
+ and_node(
+ location: node.location,
+ left: public_send(read_class, location: node.name_loc, **arguments),
+ right: public_send(
+ write_class,
+ location: node.location,
+ **arguments,
+ name_loc: node.name_loc,
+ value: node.value,
+ operator_loc: node.operator_loc
+ ),
+ operator_loc: node.operator_loc
)
end
end
class DesugarOrWriteDefinedNode # :nodoc:
- attr_reader :node, :source, :read_class, :write_class, :arguments
+ include DSL
+
+ attr_reader :node #: ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
- def initialize(node, source, read_class, write_class, *arguments)
+ #: ((ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
+ def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
- @source = source
+ @default_source = default_source
@read_class = read_class
@write_class = write_class
@arguments = arguments
end
# Desugar `x ||= y` to `defined?(x) ? x : x = y`
+ #--
+ #: () -> node
def compile
- IfNode.new(
- source,
- node.operator_loc,
- DefinedNode.new(source, nil, read_class.new(source, *arguments, node.name_loc), nil, node.operator_loc, node.name_loc),
- node.operator_loc,
- StatementsNode.new(source, [read_class.new(source, *arguments, node.name_loc)], node.location),
- ElseNode.new(
- source,
- node.operator_loc,
- StatementsNode.new(
- source,
- [write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location)],
- node.location
+ if_node(
+ location: node.location,
+ if_keyword_loc: node.operator_loc,
+ predicate: defined_node(
+ location: node.name_loc,
+ value: public_send(read_class, location: node.name_loc, **arguments),
+ keyword_loc: node.operator_loc
+ ),
+ then_keyword_loc: node.operator_loc,
+ statements: statements_node(
+ location: node.location,
+ body: [public_send(read_class, location: node.name_loc, **arguments)]
+ ),
+ subsequent: else_node(
+ location: node.location,
+ else_keyword_loc: node.operator_loc,
+ statements: statements_node(
+ location: node.location,
+ body: [
+ public_send(
+ write_class,
+ location: node.location,
+ **arguments,
+ name_loc: node.name_loc,
+ value: node.value,
+ operator_loc: node.operator_loc
+ )
+ ]
),
- node.operator_loc,
- node.location
+ end_keyword_loc: node.operator_loc
),
- node.operator_loc,
- node.location
+ end_keyword_loc: node.operator_loc
)
end
end
class DesugarOperatorWriteNode # :nodoc:
- attr_reader :node, :source, :read_class, :write_class, :arguments
+ include DSL
- def initialize(node, source, read_class, write_class, *arguments)
+ attr_reader :node #: ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+
+ #: ((ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
+ def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
- @source = source
+ @default_source = default_source
@read_class = read_class
@write_class = write_class
@arguments = arguments
end
# Desugar `x += y` to `x = x + y`
+ #--
+ #: () -> node
def compile
- write_class.new(
- source,
- *arguments,
- node.name_loc,
- CallNode.new(
- source,
- 0,
- read_class.new(source, *arguments, node.name_loc),
- nil,
- node.operator_loc.slice.chomp("=").to_sym,
- node.operator_loc.copy(length: node.operator_loc.length - 1),
- nil,
- ArgumentsNode.new(source, 0, [node.value], node.value.location),
- nil,
- nil,
- node.location
+ binary_operator_loc = node.binary_operator_loc.chop
+
+ public_send(
+ write_class,
+ location: node.location,
+ **arguments,
+ name_loc: node.name_loc,
+ value: call_node(
+ location: node.location,
+ receiver: public_send(
+ read_class,
+ location: node.name_loc,
+ **arguments
+ ),
+ name: binary_operator_loc.slice.to_sym,
+ message_loc: binary_operator_loc,
+ arguments: arguments_node(
+ location: node.value.location,
+ arguments: [node.value]
+ )
),
- node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
- node.location
+ operator_loc: node.binary_operator_loc.copy(
+ start_offset: node.binary_operator_loc.end_offset - 1,
+ length: 1
+ )
)
end
end
class DesugarOrWriteNode # :nodoc:
- attr_reader :node, :source, :read_class, :write_class, :arguments
+ include DSL
- def initialize(node, source, read_class, write_class, *arguments)
+ attr_reader :node #: InstanceVariableOrWriteNode | LocalVariableOrWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+
+ #: ((InstanceVariableOrWriteNode | LocalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
+ def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
- @source = source
+ @default_source = default_source
@read_class = read_class
@write_class = write_class
@arguments = arguments
end
# Desugar `x ||= y` to `x || x = y`
+ #--
+ #: () -> node
def compile
- OrNode.new(
- source,
- read_class.new(source, *arguments, node.name_loc),
- write_class.new(source, *arguments, node.name_loc, node.value, node.operator_loc, node.location),
- node.operator_loc,
- node.location
+ or_node(
+ location: node.location,
+ left: public_send(read_class, location: node.name_loc, **arguments),
+ right: public_send(
+ write_class,
+ location: node.location,
+ **arguments,
+ name_loc: node.name_loc,
+ value: node.value,
+ operator_loc: node.operator_loc
+ ),
+ operator_loc: node.operator_loc
)
end
end
@@ -122,229 +188,274 @@ module Prism
private_constant :DesugarAndWriteNode, :DesugarOrWriteNode, :DesugarOrWriteDefinedNode, :DesugarOperatorWriteNode
class ClassVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarAndWriteNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile
+ DesugarAndWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ClassVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOrWriteDefinedNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile
+ DesugarOrWriteDefinedNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ClassVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOperatorWriteNode.new(self, source, ClassVariableReadNode, ClassVariableWriteNode, name).compile
+ DesugarOperatorWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ConstantAndWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarAndWriteNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile
+ DesugarAndWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class ConstantOrWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOrWriteDefinedNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile
+ DesugarOrWriteDefinedNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class ConstantOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOperatorWriteNode.new(self, source, ConstantReadNode, ConstantWriteNode, name).compile
+ DesugarOperatorWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class GlobalVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarAndWriteNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile
+ DesugarAndWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class GlobalVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOrWriteDefinedNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile
+ DesugarOrWriteDefinedNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class GlobalVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOperatorWriteNode.new(self, source, GlobalVariableReadNode, GlobalVariableWriteNode, name).compile
+ DesugarOperatorWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class InstanceVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarAndWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile
+ DesugarAndWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class InstanceVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOrWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile
+ DesugarOrWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class InstanceVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOperatorWriteNode.new(self, source, InstanceVariableReadNode, InstanceVariableWriteNode, name).compile
+ DesugarOperatorWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class LocalVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarAndWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile
+ DesugarAndWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
class LocalVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOrWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile
+ DesugarOrWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
class LocalVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
- DesugarOperatorWriteNode.new(self, source, LocalVariableReadNode, LocalVariableWriteNode, name, depth).compile
+ DesugarOperatorWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
# DesugarCompiler is a compiler that desugars Ruby code into a more primitive
# form. This is useful for consumers that want to deal with fewer node types.
class DesugarCompiler < MutationCompiler
- # @@foo &&= bar
+ # `@@foo &&= bar`
#
# becomes
#
- # @@foo && @@foo = bar
+ # `@@foo && @@foo = bar`
+ #--
+ #: (ClassVariableAndWriteNode node) -> node
def visit_class_variable_and_write_node(node)
node.desugar
end
- # @@foo ||= bar
+ # `@@foo ||= bar`
#
# becomes
#
- # defined?(@@foo) ? @@foo : @@foo = bar
+ # `defined?(@@foo) ? @@foo : @@foo = bar`
+ #--
+ #: (ClassVariableOrWriteNode node) -> node
def visit_class_variable_or_write_node(node)
node.desugar
end
- # @@foo += bar
+ # `@@foo += bar`
#
# becomes
#
- # @@foo = @@foo + bar
+ # `@@foo = @@foo + bar`
+ #--
+ #: (ClassVariableOperatorWriteNode node) -> node
def visit_class_variable_operator_write_node(node)
node.desugar
end
- # Foo &&= bar
+ # `Foo &&= bar`
#
# becomes
#
- # Foo && Foo = bar
+ # `Foo && Foo = bar`
+ #--
+ #: (ConstantAndWriteNode node) -> node
def visit_constant_and_write_node(node)
node.desugar
end
- # Foo ||= bar
+ # `Foo ||= bar`
#
# becomes
#
- # defined?(Foo) ? Foo : Foo = bar
+ # `defined?(Foo) ? Foo : Foo = bar`
+ #--
+ #: (ConstantOrWriteNode node) -> node
def visit_constant_or_write_node(node)
node.desugar
end
- # Foo += bar
+ # `Foo += bar`
#
# becomes
#
- # Foo = Foo + bar
+ # `Foo = Foo + bar`
+ #--
+ #: (ConstantOperatorWriteNode node) -> node
def visit_constant_operator_write_node(node)
node.desugar
end
- # $foo &&= bar
+ # `$foo &&= bar`
#
# becomes
#
- # $foo && $foo = bar
+ # `$foo && $foo = bar`
+ #--
+ #: (GlobalVariableAndWriteNode node) -> node
def visit_global_variable_and_write_node(node)
node.desugar
end
- # $foo ||= bar
+ # `$foo ||= bar`
#
# becomes
#
- # defined?($foo) ? $foo : $foo = bar
+ # `defined?($foo) ? $foo : $foo = bar`
+ #--
+ #: (GlobalVariableOrWriteNode node) -> node
def visit_global_variable_or_write_node(node)
node.desugar
end
- # $foo += bar
+ # `$foo += bar`
#
# becomes
#
- # $foo = $foo + bar
+ # `$foo = $foo + bar`
+ #--
+ #: (GlobalVariableOperatorWriteNode node) -> node
def visit_global_variable_operator_write_node(node)
node.desugar
end
- # @foo &&= bar
+ # `@foo &&= bar`
#
# becomes
#
- # @foo && @foo = bar
+ # `@foo && @foo = bar`
+ #--
+ #: (InstanceVariableAndWriteNode node) -> node
def visit_instance_variable_and_write_node(node)
node.desugar
end
- # @foo ||= bar
+ # `@foo ||= bar`
#
# becomes
#
- # @foo || @foo = bar
+ # `@foo || @foo = bar`
+ #--
+ #: (InstanceVariableOrWriteNode node) -> node
def visit_instance_variable_or_write_node(node)
node.desugar
end
- # @foo += bar
+ # `@foo += bar`
#
# becomes
#
- # @foo = @foo + bar
+ # `@foo = @foo + bar`
+ #--
+ #: (InstanceVariableOperatorWriteNode node) -> node
def visit_instance_variable_operator_write_node(node)
node.desugar
end
- # foo &&= bar
+ # `foo &&= bar`
#
# becomes
#
- # foo && foo = bar
+ # `foo && foo = bar`
+ #--
+ #: (LocalVariableAndWriteNode node) -> node
def visit_local_variable_and_write_node(node)
node.desugar
end
- # foo ||= bar
+ # `foo ||= bar`
#
# becomes
#
- # foo || foo = bar
+ # `foo || foo = bar`
+ #--
+ #: (LocalVariableOrWriteNode node) -> node
def visit_local_variable_or_write_node(node)
node.desugar
end
- # foo += bar
+ # `foo += bar`
#
# becomes
#
- # foo = foo + bar
+ # `foo = foo + bar`
+ #--
+ #: (LocalVariableOperatorWriteNode node) -> node
def visit_local_variable_operator_write_node(node)
node.desugar
end
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index 1ca99db681..6b9bde51ea 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
# typed: ignore
# This file is responsible for mirroring the API provided by the C extension by
@@ -7,13 +8,26 @@
require "rbconfig"
require "ffi"
-module Prism
+# We want to eagerly load this file if there are Ractors so that it does not get
+# autoloaded from within a non-main Ractor.
+require "prism/serialize" if defined?(Ractor)
+
+module Prism # :nodoc:
module LibRubyParser # :nodoc:
extend FFI::Library
# Define the library that we will be pulling functions from. Note that this
# must align with the build shared library from make/rake.
- ffi_lib File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
+ libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
+ libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}"
+
+ if File.exist?(libprism_in_build)
+ INCLUDE_DIR = File.expand_path("../../include", __dir__)
+ ffi_lib libprism_in_build
+ else
+ INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include"
+ ffi_lib libprism_in_libdir
+ end
# Convert a native C type declaration into a symbol that FFI understands.
# For example:
@@ -38,13 +52,16 @@ module Prism
# given functions. For each one, define a function with the same name and
# signature as the C function.
def self.load_exported_functions_from(header, *functions, callbacks)
- File.foreach(File.expand_path("../../include/#{header}", __dir__)) do |line|
+ File.foreach("#{INCLUDE_DIR}/#{header}") do |line|
# We only want to attempt to load exported functions.
next unless line.start_with?("PRISM_EXPORTED_FUNCTION ")
# We only want to load the functions that we are interested in.
next unless functions.any? { |function| line.include?(function) }
+ # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.)
+ line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");")
+
# Parse the function declaration.
unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
raise "Could not parse #{line}"
@@ -71,24 +88,44 @@ module Prism
raise "Could not find functions #{functions.inspect}" unless functions.empty?
end
- callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
+ callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer
+ callback :pm_source_stream_feof_t, [:pointer], :int
+ pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR]
+ enum :pm_source_init_result_t, pm_source_init_result_values
+ enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
+
+ # Ractor-safe lookup table for pm_source_init_result_t, since FFI's
+ # enum_type accesses module instance variables that are not shareable.
+ SOURCE_INIT_RESULT = pm_source_init_result_values.freeze
load_exported_functions_from(
- "prism.h",
+ "prism/version.h",
"pm_version",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/serialize.h",
"pm_serialize_parse",
"pm_serialize_parse_stream",
"pm_serialize_parse_comments",
"pm_serialize_lex",
"pm_serialize_parse_lex",
- "pm_parse_success_p",
- [:pm_parse_stream_fgets_t]
+ "pm_serialize_parse_success_p",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/string_query.h",
+ "pm_string_query_local",
+ "pm_string_query_constant",
+ "pm_string_query_method_name",
+ []
)
load_exported_functions_from(
- "prism/util/pm_buffer.h",
- "pm_buffer_sizeof",
- "pm_buffer_init",
+ "prism/buffer.h",
+ "pm_buffer_new",
"pm_buffer_value",
"pm_buffer_length",
"pm_buffer_free",
@@ -96,20 +133,19 @@ module Prism
)
load_exported_functions_from(
- "prism/util/pm_string.h",
- "pm_string_mapped_init",
- "pm_string_free",
- "pm_string_source",
- "pm_string_length",
- "pm_string_sizeof",
- []
+ "prism/source.h",
+ "pm_source_file_new",
+ "pm_source_mapped_new",
+ "pm_source_stream_new",
+ "pm_source_free",
+ "pm_source_source",
+ "pm_source_length",
+ [:pm_source_stream_fgets_t, :pm_source_stream_feof_t]
)
# This object represents a pm_buffer_t. We only use it as an opaque pointer,
# so it doesn't need to know the fields of pm_buffer_t.
class PrismBuffer # :nodoc:
- SIZEOF = LibRubyParser.pm_buffer_sizeof
-
attr_reader :pointer
def initialize(pointer)
@@ -131,19 +167,22 @@ module Prism
# Initialize a new buffer and yield it to the block. The buffer will be
# automatically freed when the block returns.
def self.with
- FFI::MemoryPointer.new(SIZEOF) do |pointer|
- raise unless LibRubyParser.pm_buffer_init(pointer)
- return yield new(pointer)
+ buffer = LibRubyParser.pm_buffer_new
+ raise unless buffer
+
+ begin
+ yield new(buffer)
ensure
- LibRubyParser.pm_buffer_free(pointer)
+ LibRubyParser.pm_buffer_free(buffer)
end
end
end
- # This object represents a pm_string_t. We only use it as an opaque pointer,
- # so it doesn't have to be an FFI::Struct.
- class PrismString # :nodoc:
- SIZEOF = LibRubyParser.pm_string_sizeof
+ # This object represents source code to be parsed. For strings it wraps a
+ # pointer directly; for files it uses a pm_source_t under the hood.
+ class PrismSource # :nodoc:
+ PLATFORM_EXPECTS_UTF8 =
+ RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i)
attr_reader :pointer, :length
@@ -158,7 +197,7 @@ module Prism
@pointer.read_string(@length)
end
- # Yields a pm_string_t pointer to the given block.
+ # Yields a PrismSource backed by the given string to the block.
def self.with_string(string)
raise TypeError unless string.is_a?(String)
@@ -172,20 +211,38 @@ module Prism
end
end
- # Yields a pm_string_t pointer to the given block.
+ # Yields a PrismSource to the given block, backed by a pm_source_t.
def self.with_file(filepath)
raise TypeError unless filepath.is_a?(String)
- FFI::MemoryPointer.new(SIZEOF) do |pm_string|
- if LibRubyParser.pm_string_mapped_init(pm_string, filepath)
- pointer = LibRubyParser.pm_string_source(pm_string)
- length = LibRubyParser.pm_string_length(pm_string)
+ # On Windows and Mac, it's expected that filepaths will be encoded in
+ # UTF-8. If they are not, we need to convert them to UTF-8 before
+ # passing them into pm_source_mapped_new.
+ if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
+ filepath = filepath.encode(Encoding::UTF_8)
+ end
+
+ FFI::MemoryPointer.new(:int) do |result_ptr|
+ pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr)
+
+ case SOURCE_INIT_RESULT[result_ptr.read_int]
+ when :PM_SOURCE_INIT_SUCCESS
+ pointer = LibRubyParser.pm_source_source(pm_source)
+ length = LibRubyParser.pm_source_length(pm_source)
return yield new(pointer, length, false)
- else
+ when :PM_SOURCE_INIT_ERROR_GENERIC
raise SystemCallError.new(filepath, FFI.errno)
+ when :PM_SOURCE_INIT_ERROR_DIRECTORY
+ raise Errno::EISDIR.new(filepath)
+ when :PM_SOURCE_INIT_ERROR_NON_REGULAR
+ # Fall back to reading the file through Ruby IO for non-regular
+ # files (pipes, character devices, etc.)
+ return with_string(File.read(filepath)) { |string| yield string }
+ else
+ raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}"
end
ensure
- LibRubyParser.pm_string_free(pm_string)
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
end
end
end
@@ -196,34 +253,34 @@ module Prism
private_constant :LibRubyParser
# The version constant is set by reading the result of calling pm_version.
- VERSION = LibRubyParser.pm_version.read_string
+ VERSION = LibRubyParser.pm_version.read_string.freeze
class << self
# Mirror the Prism.dump API by using the serialization API.
- def dump(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) }
+ def dump(source, **options)
+ LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) }
end
# Mirror the Prism.dump_file API by using the serialization API.
def dump_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) }
end
# Mirror the Prism.lex API by using the serialization API.
def lex(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) }
end
# Mirror the Prism.lex_file API by using the serialization API.
def lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) }
end
# Mirror the Prism.parse API by using the serialization API.
def parse(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) }
end
# Mirror the Prism.parse_file API by using the serialization API. This uses
@@ -231,7 +288,7 @@ module Prism
# when it is available.
def parse_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) }
end
# Mirror the Prism.parse_stream API by using the serialization API.
@@ -247,19 +304,21 @@ module Prism
end
}
- # In the pm_serialize_parse_stream function it accepts a pointer to the
- # IO object as a void* and then passes it through to the callback as the
- # third argument, but it never touches it itself. As such, since we have
- # access to the IO object already through the closure of the lambda, we
- # can pass a null pointer here and not worry.
- LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
- Prism.load(source, buffer.read)
+ eof_callback = -> (_) { stream.eof? }
+
+ pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback)
+ begin
+ LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options))
+ Prism.load(source, buffer.read, options.fetch(:freeze, false))
+ ensure
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
+ end
end
end
# Mirror the Prism.parse_comments API by using the serialization API.
def parse_comments(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) }
end
# Mirror the Prism.parse_file_comments API by using the serialization
@@ -267,29 +326,60 @@ module Prism
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
end
# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) }
end
# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
end
# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) }
+ end
+
+ # Mirror the Prism.parse_failure? API by using the serialization API.
+ def parse_failure?(code, **options)
+ !parse_success?(code, **options)
end
# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) }
+ end
+
+ # Mirror the Prism.parse_file_failure? API by using the serialization API.
+ def parse_file_failure?(filepath, **options)
+ !parse_file_success?(filepath, **options)
+ end
+
+ # Mirror the Prism.profile API by using the serialization API.
+ def profile(source, **options)
+ LibRubyParser::PrismSource.with_string(source) do |string|
+ LibRubyParser::PrismBuffer.with do |buffer|
+ LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ nil
+ end
+ end
+ end
+
+ # Mirror the Prism.profile_file API by using the serialization API.
+ def profile_file(filepath, **options)
+ LibRubyParser::PrismSource.with_file(filepath) do |string|
+ LibRubyParser::PrismBuffer.with do |buffer|
+ options[:filepath] = filepath
+ LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ nil
+ end
+ end
end
private
@@ -297,55 +387,42 @@ module Prism
def dump_common(string, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
- buffer.read
+
+ dumped = buffer.read
+ dumped.freeze if options.fetch(:freeze, false)
+
+ dumped
end
end
def lex_common(string, code, options) # :nodoc:
- serialized = LibRubyParser::PrismBuffer.with do |buffer|
+ LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
- buffer.read
+ Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false))
end
-
- Serialize.load_tokens(Source.new(code), serialized)
end
def parse_common(string, code, options) # :nodoc:
serialized = dump_common(string, options)
- Prism.load(code, serialized)
+ Serialize.load_parse(code, serialized, options.fetch(:freeze, false))
end
def parse_comments_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))
-
- source = Source.new(code)
- loader = Serialize::Loader.new(source, buffer.read)
-
- loader.load_header
- loader.load_encoding
- loader.load_start_line
- loader.load_comments
+ Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false))
end
end
def parse_lex_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
-
- source = Source.new(code)
- loader = Serialize::Loader.new(source, buffer.read)
-
- tokens = loader.load_tokens
- node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
- tokens.each { |token,| token.value.force_encoding(loader.encoding) }
-
- ParseResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
+ Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false))
end
end
def parse_file_success_common(string, options) # :nodoc:
- LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options))
+ LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options))
end
# Return the value that should be dumped for the command_line option.
@@ -366,6 +443,41 @@ module Prism
end
end
+ # Return the value that should be dumped for the version option.
+ def dump_options_version(version)
+ case version
+ when "current"
+ version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION)
+ when "latest", nil
+ 0 # Handled in pm_parser_init
+ when "nearest"
+ dump = version_string_to_number(RUBY_VERSION)
+ return dump if dump
+ if RUBY_VERSION < "3.3"
+ version_string_to_number("3.3")
+ else
+ 0 # Handled in pm_parser_init
+ end
+ else
+ version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}")
+ end
+ end
+
+ # Converts a version string like "4.0.0" or "4.0" into a number.
+ # Returns nil if the version is unknown.
+ def version_string_to_number(version)
+ case version
+ when /\A3\.3(\.\d+)?\z/
+ 1
+ when /\A3\.4(\.\d+)?\z/
+ 2
+ when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/
+ 3
+ when /\A4\.1(\.\d+)?\z/
+ 4
+ end
+ end
+
# Convert the given options into a serialized options string.
def dump_options(options)
template = +""
@@ -383,11 +495,8 @@ module Prism
values << options.fetch(:line, 1)
template << "L"
- values << options.fetch(:offset, 0)
-
- template << "L"
if (encoding = options[:encoding])
- name = encoding.name
+ name = encoding.is_a?(Encoding) ? encoding.name : encoding
values.push(name.bytesize, name.b)
template << "A*"
else
@@ -401,17 +510,54 @@ module Prism
values << dump_options_command_line(options)
template << "C"
- values << { nil => 0, "3.3.0" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
+ values << dump_options_version(options[:version])
+
+ template << "C"
+ values << (options[:encoding] == false ? 1 : 0)
+
+ template << "C"
+ values << (options.fetch(:main_script, false) ? 1 : 0)
+
+ template << "C"
+ values << (options.fetch(:partial_script, false) ? 1 : 0)
+
+ template << "C"
+ values << (options.fetch(:freeze, false) ? 1 : 0)
template << "L"
if (scopes = options[:scopes])
values << scopes.length
scopes.each do |scope|
+ locals = nil
+ forwarding = 0
+
+ case scope
+ when Array
+ locals = scope
+ when Scope
+ locals = scope.locals
+
+ scope.forwarding.each do |forward|
+ case forward
+ when :* then forwarding |= 0x1
+ when :** then forwarding |= 0x2
+ when :& then forwarding |= 0x4
+ when :"..." then forwarding |= 0x8
+ else raise ArgumentError, "invalid forwarding value: #{forward}"
+ end
+ end
+ else
+ raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)"
+ end
+
template << "L"
- values << scope.length
+ values << locals.length
+
+ template << "C"
+ values << forwarding
- scope.each do |local|
+ locals.each do |local|
name = local.name
template << "L"
values << name.bytesize
@@ -427,4 +573,39 @@ module Prism
values.pack(template)
end
end
+
+ # Here we are going to patch StringQuery to put in the class-level methods so
+ # that it can maintain a consistent interface
+ class StringQuery # :nodoc:
+ class << self
+ # Mirrors the C extension's StringQuery::local? method.
+ def local?(string)
+ query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name))
+ end
+
+ # Mirrors the C extension's StringQuery::constant? method.
+ def constant?(string)
+ query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name))
+ end
+
+ # Mirrors the C extension's StringQuery::method_name? method.
+ def method_name?(string)
+ query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name))
+ end
+
+ private
+
+ # Parse the enum result and return an appropriate boolean.
+ def query(result)
+ case result
+ when :PM_STRING_QUERY_ERROR
+ raise ArgumentError, "Invalid or non ascii-compatible encoding"
+ when :PM_STRING_QUERY_FALSE
+ false
+ when :PM_STRING_QUERY_TRUE
+ true
+ end
+ end
+ end
+ end
end
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 70cb065201..7aacec037d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -1,15 +1,68 @@
# frozen_string_literal: true
-
-require "delegate"
-require "ripper"
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
+ # @rbs!
+ # module Translation
+ # class Ripper
+ # EXPR_NONE: Integer
+ # EXPR_BEG: Integer
+ # EXPR_MID: Integer
+ # EXPR_END: Integer
+ # EXPR_CLASS: Integer
+ # EXPR_VALUE: Integer
+ # EXPR_ARG: Integer
+ # EXPR_CMDARG: Integer
+ # EXPR_ENDARG: Integer
+ # EXPR_ENDFN: Integer
+ #
+ # class Lexer < Ripper
+ # class State
+ # def self.[]: (Integer value) -> State
+ # end
+ # end
+ #
+ # class LineAndColumnCache
+ # def initialize: (Source source) -> void
+ #
+ # def line_and_column: (Integer byte_offset) -> [Integer, Integer]
+ # end
+ # end
+ # end
+
# This class is responsible for lexing the source using prism and then
# converting those tokens to be compatible with Ripper. In the vast majority
# of cases, this is a one-to-one mapping of the token type. Everything else
# generally lines up. However, there are a few cases that require special
# handling.
class LexCompat # :nodoc:
+ # @rbs!
+ # # A token produced by the Ripper lexer that Prism is replicating.
+ # type lex_compat_token = [[Integer, Integer], Symbol, String, untyped]
+
+ # A result class specialized for holding tokens produced by the lexer.
+ class Result < Prism::Result
+ # The list of tokens that were produced by the lexer.
+ attr_reader :value #: Array[lex_compat_token]
+
+ # Create a new lex compat result object with the given values.
+ #--
+ #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ end
+
+ # Implement the hash pattern matching interface for Result.
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
+ super.merge!(value: value)
+ end
+ end
+
# This is a mapping of prism token types to Ripper token types. This is a
# many-to-one mapping because we split up our token types, whereas Ripper
# tends to group them.
@@ -87,6 +140,7 @@ module Prism
KEYWORD_DEF: :on_kw,
KEYWORD_DEFINED: :on_kw,
KEYWORD_DO: :on_kw,
+ KEYWORD_DO_BLOCK: :on_kw,
KEYWORD_DO_LOOP: :on_kw,
KEYWORD_ELSE: :on_kw,
KEYWORD_ELSIF: :on_kw,
@@ -181,93 +235,6 @@ module Prism
"__END__": :on___end__
}.freeze
- # When we produce tokens, we produce the same arrays that Ripper does.
- # However, we add a couple of convenience methods onto them to make them a
- # little easier to work with. We delegate all other methods to the array.
- class Token < SimpleDelegator
- # @dynamic initialize, each, []
-
- # The location of the token in the source.
- def location
- self[0]
- end
-
- # The type of the token.
- def event
- self[1]
- end
-
- # The slice of the source that this token represents.
- def value
- self[2]
- end
-
- # The state of the lexer when this token was produced.
- def state
- self[3]
- end
- end
-
- # Ripper doesn't include the rest of the token in the event, so we need to
- # trim it down to just the content on the first line when comparing.
- class EndContentToken < Token
- def ==(other) # :nodoc:
- [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
- end
- end
-
- # Tokens where state should be ignored
- # used for :on_comment, :on_heredoc_end, :on_embexpr_end
- class IgnoreStateToken < Token
- def ==(other) # :nodoc:
- self[0...-1] == other[0...-1]
- end
- end
-
- # Ident tokens for the most part are exactly the same, except sometimes we
- # know an ident is a local when ripper doesn't (when they are introduced
- # through named captures in regular expressions). In that case we don't
- # compare the state.
- class IdentToken < Token
- def ==(other) # :nodoc:
- (self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
- (other[3] & Ripper::EXPR_ARG_ANY != 0)
- )
- end
- end
-
- # Ignored newlines can occasionally have a LABEL state attached to them, so
- # we compare the state differently here.
- class IgnoredNewlineToken < Token
- def ==(other) # :nodoc:
- return false unless self[0...-1] == other[0...-1]
-
- if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
- other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
- else
- self[3] == other[3]
- end
- end
- end
-
- # If we have an identifier that follows a method name like:
- #
- # def foo bar
- #
- # then Ripper will mark bar as END|LABEL if there is a local in a parent
- # scope named bar because it hasn't pushed the local table yet. We do this
- # more accurately, so we need to allow comparing against both END and
- # END|LABEL.
- class ParamToken < Token
- def ==(other) # :nodoc:
- (self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_END) ||
- (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
- )
- end
- end
-
# A heredoc in this case is a list of tokens that belong to the body of the
# heredoc that should be appended onto the list of tokens when the heredoc
# closes.
@@ -277,16 +244,19 @@ module Prism
# order back into the token stream and set the state of the last token to
# the state that the heredoc was opened in.
class PlainHeredoc # :nodoc:
- attr_reader :tokens
+ attr_reader :tokens #: Array[lex_compat_token]
+ #: () -> void
def initialize
@tokens = []
end
+ #: (lex_compat_token token) -> void
def <<(token)
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
tokens
end
@@ -296,22 +266,26 @@ module Prism
# that need to be split on "\\\n" to mimic Ripper's behavior. We also need
# to keep track of the state that the heredoc was opened in.
class DashHeredoc # :nodoc:
- attr_reader :split, :tokens
+ attr_reader :split #: bool
+ attr_reader :tokens #: Array[lex_compat_token]
+ #: (bool split) -> void
def initialize(split)
@split = split
@tokens = []
end
+ #: (lex_compat_token token) -> void
def <<(token)
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
embexpr_balance = 0
- tokens.each_with_object([]) do |token, results| #$ Array[Token]
- case token.event
+ tokens.each_with_object([]) do |token, results| #$ Array[lex_compat_token]
+ case token[1]
when :on_embexpr_beg
embexpr_balance += 1
results << token
@@ -326,9 +300,9 @@ module Prism
if split
# Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
# to keep the delimiter in the result.
- token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+ token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
column = 0 if index > 0
- results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ results << [[lineno, column], :on_tstring_content, value, token[3]]
lineno += value.count("\n")
end
else
@@ -357,8 +331,13 @@ module Prism
class DedentingHeredoc # :nodoc:
TAB_WIDTH = 8
- attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
+ attr_reader :tokens #: Array[lex_compat_token]
+ attr_reader :dedent_next #: bool
+ attr_reader :dedent #: Integer?
+ attr_reader :embexpr_balance #: Integer
+ # @rbs @ended_on_newline: bool
+ #: () -> void
def initialize
@tokens = []
@dedent_next = true
@@ -370,8 +349,10 @@ module Prism
# As tokens are coming in, we track the minimum amount of common leading
# whitespace on plain string content tokens. This allows us to later
# remove that amount of whitespace from the beginning of each line.
+ #
+ #: (lex_compat_token token) -> void
def <<(token)
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
@embexpr_balance += 1
@dedent = 0 if @dedent_next && @ended_on_newline
@@ -379,10 +360,10 @@ module Prism
@embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
- line = token.value
+ line = token[2]
if dedent_next && !(line.strip.empty? && line.end_with?("\n"))
- leading = line[/\A(\s*)\n?/, 1]
+ leading = line[/\A(\s*)\n?/, 1] #: String
next_dedent = 0
leading.each_char do |char|
@@ -402,20 +383,21 @@ module Prism
end
end
- @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+ @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0
@ended_on_newline = false
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
# If every line in the heredoc is blank, we still need to split up the
# string content token into multiple tokens.
if dedent.nil?
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
embexpr_balance = 0
tokens.each do |token|
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
results << token
@@ -427,9 +409,9 @@ module Prism
lineno = token[0][0]
column = token[0][1]
- token.value.split(/(?<=\n)/).each_with_index do |value, index|
+ token[2].split(/(?<=\n)/).each_with_index do |value, index|
column = 0 if index > 0
- results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ results << [[lineno, column], :on_tstring_content, value, token[3]]
lineno += 1
end
else
@@ -446,7 +428,7 @@ module Prism
# If the minimum common whitespace is 0, then we need to concatenate
# string nodes together that are immediately adjacent.
if dedent == 0
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
embexpr_balance = 0
index = 0
@@ -457,15 +439,15 @@ module Prism
results << token
index += 1
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
when :on_embexpr_end, :on_heredoc_end
embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
- while index < max_index && tokens[index].event == :on_tstring_content
- token.value << tokens[index].value
+ while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/)
+ token[2] << tokens[index][2]
index += 1
end
end
@@ -479,7 +461,7 @@ module Prism
# insert on_ignored_sp tokens for the amount of dedent that we need to
# perform. We also need to remove the dedent from the beginning of
# each line of plain string content tokens.
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
dedent_next = true
embexpr_balance = 0
@@ -488,7 +470,7 @@ module Prism
# whitespace calculation we performed above. This is because
# checking if the subsequent token needs to be dedented is common to
# both the dedent calculation and the ignored_sp insertion.
- case token.event
+ case token[1]
when :on_embexpr_beg
embexpr_balance += 1
results << token
@@ -500,7 +482,7 @@ module Prism
# Here we're going to split the string on newlines, but maintain
# the newlines in the resulting array. We'll do that with a look
# behind assertion.
- splits = token.value.split(/(?<=\n)/)
+ splits = token[2].split(/(?<=\n)/)
index = 0
while index < splits.length
@@ -518,7 +500,8 @@ module Prism
# line or this line doesn't start with whitespace, then we
# should concatenate the rest of the string to match ripper.
if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
- line = splits[index..].join
+ unjoined = splits[index..] #: Array[String]
+ line = unjoined.join
index = splits.length
end
@@ -557,12 +540,12 @@ module Prism
ignored = deleted_chars.join
line.delete_prefix!(ignored)
- results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+ results << [[lineno, 0], :on_ignored_sp, ignored, token[3]]
column = ignored.length
end
end
- results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+ results << [[lineno, column], token[1], line, token[3]] unless line.empty?
index += 1
end
else
@@ -573,7 +556,7 @@ module Prism
end
dedent_next =
- ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+ ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) &&
embexpr_balance == 0
end
@@ -583,12 +566,14 @@ module Prism
# Here we will split between the two types of heredocs and return the
# object that will store their tokens.
+ #--
+ #: (lex_compat_token opening) -> (PlainHeredoc | DashHeredoc | DedentingHeredoc)
def self.build(opening)
- case opening.value[2]
+ case opening[2][2]
when "~"
DedentingHeredoc.new
when "-"
- DashHeredoc.new(opening.value[3] != "'")
+ DashHeredoc.new(opening[2][3] != "'")
else
PlainHeredoc.new
end
@@ -597,33 +582,43 @@ module Prism
private_constant :Heredoc
- attr_reader :source, :options
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+ # first token, so we had to have a hack in place to account for that.
+ BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
+ private_constant :BOM_FLUSHED
+
+ attr_reader :options #: Hash[Symbol, untyped]
+ # @rbs @source: String
+ #: (String source, **untyped options) -> void
def initialize(source, **options)
@source = source
@options = options
end
+ #: () -> Result
def result
- tokens = [] #: Array[LexCompat::Token]
+ tokens = [] #: Array[lex_compat_token]
state = :default
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
- result = Prism.lex(source, **options)
+ result = Prism.lex(@source, **options)
+ source = result.source
result_value = result.value
- previous_state = nil #: Ripper::Lexer::State?
+ previous_state = nil #: Translation::Ripper::Lexer::State?
last_heredoc_end = nil #: Integer?
+ eof_token = nil #: Token?
- # In previous versions of Ruby, Ripper wouldn't flush the bom before the
- # first token, so we had to have a hack in place to account for that. This
- # checks for that behavior.
- bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
- bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
+ bom = source.slice(0, 3) == "\xEF\xBB\xBF"
- result_value.each_with_index do |(token, lex_state), index|
- lineno = token.location.start_line
- column = token.location.start_column
+ result_value.each_with_index do |(prism_token, prism_state), index|
+ lineno = prism_token.location.start_line
+ column = prism_token.location.start_column
+
+ event = RIPPER.fetch(prism_token.type)
+ value = prism_token.value
+ lex_state = Translation::Ripper::Lexer::State[prism_state]
# If there's a UTF-8 byte-order mark as the start of the file, then for
# certain tokens ripper sets the first token back by 3 bytes. It also
@@ -633,70 +628,53 @@ module Prism
if bom && lineno == 1
column -= 3
- if index == 0 && column == 0 && !bom_flushed
+ if index == 0 && column == 0 && !BOM_FLUSHED
flushed =
- case token.type
+ case prism_token.type
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
:GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
:PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
:PERCENT_UPPER_W, :STRING_BEGIN
true
when :REGEXP_BEGIN, :SYMBOL_BEGIN
- token.value.start_with?("%")
+ value.start_with?("%")
else
false
end
unless flushed
column -= 3
- value = token.value
value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
end
end
end
- event = RIPPER.fetch(token.type)
- value = token.value
- lex_state = Ripper::Lexer::State.new(lex_state)
-
- token =
+ lex_compat_token =
case event
when :on___end__
- EndContentToken.new([[lineno, column], event, value, lex_state])
+ # Ripper doesn't include the rest of the token in the event, so we need to
+ # trim it down to just the content on the first line.
+ value = value[0..value.index("\n")] #: String
+ [[lineno, column], event, value, lex_state]
when :on_comment
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
when :on_heredoc_end
# Heredoc end tokens can be emitted in an odd order, so we don't
# want to bother comparing the state on them.
- last_heredoc_end = token.location.end_offset
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
- when :on_ident
- if lex_state == Ripper::EXPR_END
- # If we have an identifier that follows a method name like:
- #
- # def foo bar
- #
- # then Ripper will mark bar as END|LABEL if there is a local in a
- # parent scope named bar because it hasn't pushed the local table
- # yet. We do this more accurately, so we need to allow comparing
- # against both END and END|LABEL.
- ParamToken.new([[lineno, column], event, value, lex_state])
- elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
- # In the event that we're comparing identifiers, we're going to
- # allow a little divergence. Ripper doesn't account for local
- # variables introduced through named captures in regexes, and we
- # do, which accounts for this difference.
- IdentToken.new([[lineno, column], event, value, lex_state])
- else
- Token.new([[lineno, column], event, value, lex_state])
- end
+ last_heredoc_end = prism_token.location.end_offset
+ [[lineno, column], event, value, lex_state]
when :on_embexpr_end
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
- when :on_ignored_nl
- # Ignored newlines can occasionally have a LABEL state attached to
- # them which doesn't actually impact anything. We don't mirror that
- # state so we ignored it.
- IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
+ when :on_words_sep
+ # Ripper emits one token each per line.
+ value.each_line.with_index do |line, index|
+ if index > 0
+ lineno += 1
+ column = 0
+ end
+ tokens << [[lineno, column], event, line, lex_state]
+ end
+ tokens.pop #: lex_compat_token
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. prism sets lex state
@@ -721,13 +699,14 @@ module Prism
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end
- Ripper::Lexer::State.new(result_value[current_index][1])
+ Translation::Ripper::Lexer::State[result_value[current_index][1]]
else
previous_state
end
- Token.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
when :on_eof
+ eof_token = prism_token
previous_token = result_value[index - 1][0]
# If we're at the end of the file and the previous token was a
@@ -742,7 +721,7 @@ module Prism
# Use the greater offset of the two to determine the start of
# the trailing whitespace.
start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max
- end_offset = token.location.start_offset
+ end_offset = prism_token.location.start_offset
if start_offset < end_offset
if bom
@@ -750,14 +729,14 @@ module Prism
end_offset += 3
end
- tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+ tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]
end
end
- Token.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
else
- Token.new([[lineno, column], event, value, lex_state])
- end
+ [[lineno, column], event, value, lex_state]
+ end #: lex_compat_token
previous_state = lex_state
@@ -774,19 +753,19 @@ module Prism
when :default
# The default state is when there are no heredocs at all. In this
# state we can append the token to the list of tokens and move on.
- tokens << token
+ tokens << lex_compat_token
# If we get the declaration of a heredoc, then we open a new heredoc
# and move into the heredoc_opened state.
if event == :on_heredoc_beg
state = :heredoc_opened
- heredoc_stack.last << Heredoc.build(token)
+ heredoc_stack.last << Heredoc.build(lex_compat_token)
end
when :heredoc_opened
# The heredoc_opened state is when we've seen the declaration of a
# heredoc and are now lexing the body of the heredoc. In this state we
# push tokens onto the most recently created heredoc.
- heredoc_stack.last.last << token
+ heredoc_stack.last.last << lex_compat_token
case event
when :on_heredoc_beg
@@ -794,7 +773,7 @@ module Prism
# heredoc, this means we have nested heredocs. In this case we'll
# push a new heredoc onto the stack and stay in the heredoc_opened
# state since we're now lexing the body of the new heredoc.
- heredoc_stack << [Heredoc.build(token)]
+ heredoc_stack << [Heredoc.build(lex_compat_token)]
when :on_heredoc_end
# If we receive the end of a heredoc, then we're done lexing the
# body of the heredoc. In this case we now have a completed heredoc
@@ -803,10 +782,10 @@ module Prism
state = :heredoc_closed
end
when :heredoc_closed
- if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
+ if %i[on_nl on_ignored_nl on_comment].include?(event) || ((event == :on_tstring_content) && value.end_with?("\n"))
if heredoc_stack.size > 1
- flushing = heredoc_stack.pop
- heredoc_stack.last.last << token
+ flushing = heredoc_stack.pop #: Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]
+ heredoc_stack.last.last << lex_compat_token
flushing.each do |heredoc|
heredoc.to_a.each do |flushed_token|
@@ -818,12 +797,12 @@ module Prism
next
end
elsif event == :on_heredoc_beg
- tokens << token
+ tokens << lex_compat_token
state = :heredoc_opened
- heredoc_stack.last << Heredoc.build(token)
+ heredoc_stack.last << Heredoc.build(lex_compat_token)
next
elsif heredoc_stack.size > 1
- heredoc_stack[-2].last << token
+ heredoc_stack[-2].last << lex_compat_token
next
end
@@ -834,77 +813,94 @@ module Prism
heredoc_stack.last.clear
state = :default
- tokens << token
+ tokens << lex_compat_token
end
end
- # Drop the EOF token from the list
- tokens = tokens[0...-1]
-
- # We sort by location to compare against Ripper's output
- tokens.sort_by!(&:location)
-
- ParseResult.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.new(source))
- end
- end
+ # Drop the EOF token from the list. The EOF token may not be
+ # present if the source was syntax invalid
+ if tokens.dig(-1, 1) == :on_eof
+ tokens = tokens[0...-1] #: Array[lex_compat_token]
+ end
- private_constant :LexCompat
+ # We sort by location because Ripper.lex sorts.
+ tokens.sort_by! do |token|
+ line, column = token[0]
+ source.byte_offset(line, column)
+ end
- # This is a class that wraps the Ripper lexer to produce almost exactly the
- # same tokens.
- class LexRipper # :nodoc:
- attr_reader :source
+ tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token)
- def initialize(source)
- @source = source
+ Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, result.continuable?, source)
end
- def result
- previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
- results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]
-
- lex(source).each do |token|
- case token[1]
- when :on_sp
- # skip
- when :on_tstring_content
- if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
- previous[2] << token[2]
- else
- results << token
- previous = token
- end
- when :on_words_sep
- if previous[1] == :on_words_sep
- previous[2] << token[2]
+ private
+
+ #: (Array[lex_compat_token] tokens, Source source, Location? data_loc, bool bom, Token? eof_token) -> Array[lex_compat_token]
+ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
+ new_tokens = [] #: Array[lex_compat_token]
+
+ prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
+ prev_token_end = bom ? 3 : 0
+
+ cache = Translation::Ripper::LineAndColumnCache.new(source)
+
+ tokens.each do |token|
+ # Skip missing heredoc ends.
+ next if token[1] == :on_heredoc_end && token[2] == ""
+
+ # Add :on_sp tokens.
+ line, column = token[0]
+ start_offset = source.byte_offset(line, column)
+
+ # Ripper reports columns on line 1 without counting the BOM, so we
+ # adjust to get the real offset
+ start_offset += 3 if line == 1 && bom
+
+ if start_offset > prev_token_end
+ sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
+ sp_line, sp_column = cache.line_and_column(prev_token_end)
+ # Ripper reports columns on line 1 without counting the BOM
+ sp_column -= 3 if sp_line == 1 && bom
+ continuation_index = sp_value.byteindex("\\")
+
+ # ripper emits up to three :on_sp tokens when line continuations are used
+ if continuation_index
+ next_whitespace_index = continuation_index + 1
+ next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
+ next_whitespace_index += 1
+ first_whitespace = sp_value[0...continuation_index] #: String
+ continuation = sp_value[continuation_index...next_whitespace_index] #: String
+ second_whitespace = sp_value[next_whitespace_index..] || ""
+
+ new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
+ new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
+ new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty?
else
- results << token
- previous = token
+ new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state]
end
- else
- results << token
- previous = token
end
- end
-
- results
- end
-
- private
- if Ripper.method(:lex).parameters.assoc(:keyrest)
- def lex(source)
- Ripper.lex(source, raise_errors: true)
+ new_tokens << token
+ prev_token_state = token[3]
+ prev_token_end = start_offset + token[2].bytesize
end
- else
- def lex(source)
- ripper = Ripper::Lexer.new(source)
- ripper.lex.tap do |result|
- raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
+
+ if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+ end_offset = eof_token.location.end_offset
+ if prev_token_end < end_offset
+ new_tokens << [
+ [source.line(prev_token_end), source.column(prev_token_end)],
+ :on_sp,
+ source.slice(prev_token_end, end_offset - prev_token_end),
+ prev_token_state
+ ]
end
end
+
+ new_tokens
end
end
- private_constant :LexRipper
+ private_constant :LexCompat
end
diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb
index 8674544065..8a6624e76d 100644
--- a/lib/prism/node_ext.rb
+++ b/lib/prism/node_ext.rb
@@ -1,13 +1,37 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+#--
# Here we are reopening the prism module to provide methods on nodes that aren't
# templated and are meant as convenience methods.
+#++
module Prism
+ class Node
+ #: (*String replacements) -> void
+ def deprecated(*replacements) # :nodoc:
+ location = caller_locations(1, 1)&.[](0)&.label
+ suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
+
+ warn(<<~MSG, uplevel: 1, category: :deprecated)
+ [deprecation]: #{self.class}##{location} is deprecated and will be \
+ removed in the next major version. Use #{suggest.join("/")} instead.
+ #{(caller(1, 3) || []).join("\n")}
+ MSG
+ end
+ end
+
module RegularExpressionOptions # :nodoc:
# Returns a numeric value that represents the flags that were used to create
# the regular expression.
- def options
- o = flags & (RegularExpressionFlags::IGNORE_CASE | RegularExpressionFlags::EXTENDED | RegularExpressionFlags::MULTI_LINE)
+ #--
+ #: (Integer flags) -> Integer
+ def self.options(flags)
+ o = 0
+ o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
+ o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
+ o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE)
o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
o
@@ -15,67 +39,121 @@ module Prism
end
class InterpolatedMatchLastLineNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class InterpolatedRegularExpressionNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class MatchLastLineNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class RegularExpressionNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
private_constant :RegularExpressionOptions
module HeredocQuery # :nodoc:
# Returns true if this node was represented as a heredoc in the source code.
- def heredoc?
+ #--
+ #: (String? opening) -> bool?
+ def self.heredoc?(opening)
+ # @type self: InterpolatedStringNode | InterpolatedXStringNode | StringNode | XStringNode
opening&.start_with?("<<")
end
end
class InterpolatedStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
end
class InterpolatedXStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
end
class StringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
# Occasionally it's helpful to treat a string as if it were interpolated so
# that there's a consistent interface for working with strings.
+ #--
+ #: () -> InterpolatedStringNode
def to_interpolated
InterpolatedStringNode.new(
source,
+ -1,
+ location,
frozen? ? InterpolatedStringNodeFlags::FROZEN : 0,
opening_loc,
- [copy(opening_loc: nil, closing_loc: nil, location: content_loc)],
- closing_loc,
- location
+ [copy(location: content_loc, opening_loc: nil, closing_loc: nil)],
+ closing_loc
)
end
end
class XStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
# Occasionally it's helpful to treat a string as if it were interpolated so
# that there's a consistent interface for working with strings.
+ #--
+ #: () -> InterpolatedXStringNode
def to_interpolated
InterpolatedXStringNode.new(
source,
+ -1,
+ location,
+ flags,
opening_loc,
- [StringNode.new(source, 0, nil, content_loc, nil, unescaped, content_loc)],
- closing_loc,
- location
+ [StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)],
+ closing_loc
)
end
end
@@ -84,6 +162,8 @@ module Prism
class ImaginaryNode < Node
# Returns the value of the node as a Ruby Complex.
+ #--
+ #: () -> Complex
def value
Complex(0, numeric.value)
end
@@ -91,19 +171,25 @@ module Prism
class RationalNode < Node
# Returns the value of the node as a Ruby Rational.
+ #--
+ #: () -> Rational
def value
- Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
+ Rational(numerator, denominator)
end
end
class ConstantReadNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -112,11 +198,15 @@ module Prism
class ConstantWriteNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -131,23 +221,26 @@ module Prism
# local variable
class DynamicPartsInConstantPathError < StandardError; end
- # An error class raised when missing nodes are found while computing a
+ # An error class raised when error recovery nodes are found while computing a
# constant path's full name. For example:
# Foo:: -> raises because the constant path is missing the last part
- class MissingNodesInConstantPathError < StandardError; end
+ class ErrorRecoveryNodesInConstantPathError < StandardError; end
# Returns the list of parts for the full name of this constant path.
# For example: [:Foo, :Bar]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
parts = [] #: Array[Symbol]
current = self #: node?
while current.is_a?(ConstantPathNode)
- child = current.child
- if child.is_a?(MissingNode)
- raise MissingNodesInConstantPathError, "Constant path contains missing nodes. Cannot compute full name"
+ name = current.name
+ if name.nil?
+ raise ErrorRecoveryNodesInConstantPathError, "Constant path contains error recovery nodes. Cannot compute full name"
end
- parts.unshift(child.name)
+
+ parts.unshift(name)
current = current.parent
end
@@ -159,6 +252,8 @@ module Prism
end
# Returns the full name of this constant path. For example: "Foo::Bar"
+ #--
+ #: () -> String
def full_name
full_name_parts.join("::")
end
@@ -167,9 +262,11 @@ module Prism
class ConstantPathTargetNode < Node
# Returns the list of parts for the full name of this constant path.
# For example: [:Foo, :Bar]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
parts =
- case parent
+ case (parent = self.parent)
when ConstantPathNode, ConstantReadNode
parent.full_name_parts
when nil
@@ -179,14 +276,16 @@ module Prism
raise ConstantPathNode::DynamicPartsInConstantPathError, "Constant target path contains dynamic parts. Cannot compute full name"
end
- if child.is_a?(MissingNode)
- raise ConstantPathNode::MissingNodesInConstantPathError, "Constant target path contains missing nodes. Cannot compute full name"
+ if (name = self.name).nil?
+ raise ConstantPathNode::ErrorRecoveryNodesInConstantPathError, "Constant target path contains error recovery nodes. Cannot compute full name"
end
- parts.push(child.name)
+ parts.push(name)
end
# Returns the full name of this constant path. For example: "Foo::Bar"
+ #--
+ #: () -> String
def full_name
full_name_parts.join("::")
end
@@ -195,11 +294,15 @@ module Prism
class ConstantTargetNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -207,6 +310,8 @@ module Prism
class ParametersNode < Node
# Mirrors the Method#parameters method.
+ #--
+ #: () -> Array[[Symbol, Symbol] | [Symbol]]
def signature
names = [] #: Array[[Symbol, Symbol] | [Symbol]]
@@ -216,15 +321,15 @@ module Prism
optionals.each { |param| names << [:opt, param.name] }
- if rest && rest.is_a?(RestParameterNode)
+ if (rest = self.rest).is_a?(RestParameterNode)
names << [:rest, rest.name || :*]
end
posts.each do |param|
- if param.is_a?(MultiTargetNode)
+ case param
+ when MultiTargetNode
names << [:req]
- elsif param.is_a?(NoKeywordsParameterNode)
- # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
+ when ErrorRecoveryNode
raise "Invalid syntax"
else
names << [:req, param.name]
@@ -244,7 +349,7 @@ module Prism
keyopt.each { |param| names << [:key, param.name] }
- case keyword_rest
+ case (keyword_rest = self.keyword_rest)
when ForwardingParameterNode
names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]])
when KeywordRestParameterNode
@@ -253,8 +358,31 @@ module Prism
names << [:nokey]
end
- names << [:block, block.name || :&] if block
+ case (block = self.block)
+ when BlockParameterNode
+ names << [:block, block.name || :&]
+ when NoBlockParameterNode
+ names << [:noblock]
+ end
+
names
end
end
+
+ class CallNode < Node
+ # When a call node has the attribute_write flag set, it means that the call
+ # is using the attribute write syntax. This is either a method call to []=
+ # or a method call to a method that ends with =. Either way, the = sign is
+ # present in the source.
+ #
+ # Prism returns the message_loc _without_ the = sign attached, because there
+ # can be any amount of space between the message and the = sign. However,
+ # sometimes you want the location of the full message including the inner
+ # space and the = sign. This method provides that.
+ #--
+ #: () -> Location?
+ def full_message_loc
+ attribute_write? ? message_loc&.adjoin("=") : message_loc
+ end
+ end
end
diff --git a/lib/prism/node_find.rb b/lib/prism/node_find.rb
new file mode 100644
index 0000000000..697ee430e8
--- /dev/null
+++ b/lib/prism/node_find.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+
+module Prism
+ # Finds the Prism AST node corresponding to a given Method, UnboundMethod,
+ # Proc, or Thread::Backtrace::Location. On CRuby, uses node_id from the
+ # instruction sequence for an exact match. On other implementations, falls
+ # back to best-effort matching by source location line number.
+ #
+ # This module is autoloaded so that programs that don't use Prism.find don't
+ # pay for its definition.
+ module NodeFind # :nodoc:
+ # Find the node for the given callable or backtrace location.
+ #--
+ #: (Method | UnboundMethod | Proc | Thread::Backtrace::Location callable, bool rubyvm) -> Node?
+ def self.find(callable, rubyvm)
+ case callable
+ when Proc
+ if rubyvm
+ RubyVMCallableFind.new.find(callable)
+ elsif callable.lambda?
+ LineLambdaFind.new.find(callable)
+ else
+ LineProcFind.new.find(callable)
+ end
+ when Method, UnboundMethod
+ if rubyvm
+ RubyVMCallableFind.new.find(callable)
+ else
+ LineMethodFind.new.find(callable)
+ end
+ when Thread::Backtrace::Location
+ if rubyvm
+ RubyVMBacktraceLocationFind.new.find(callable)
+ else
+ LineBacktraceLocationFind.new.find(callable)
+ end
+ else
+ raise ArgumentError, "Expected a Method, UnboundMethod, Proc, or Thread::Backtrace::Location, got #{callable.class}"
+ end
+ end
+
+ # Base class that handles parsing a file.
+ class Find
+ private
+
+ # Parse the given file path, returning a ParseResult or nil.
+ #--
+ #: (String? file) -> ParseResult?
+ def parse_file(file)
+ return unless file && File.readable?(file)
+ result = Prism.parse_file(file)
+ result if result.success?
+ end
+ end
+
+ # Finds the AST node for a Method, UnboundMethod, or Proc using the node_id
+ # from the instruction sequence.
+ class RubyVMCallableFind < Find
+ # Find the node for the given callable using the ISeq node_id.
+ #--
+ #: (Method | UnboundMethod | Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+ return unless (iseq = RubyVM::InstructionSequence.of(callable))
+
+ header = iseq.to_a[4]
+ return unless header[:parser] == :prism
+
+ result.value.find { |node| node.node_id == header[:node_id] }
+ end
+ end
+
+ # Finds the AST node for a Thread::Backtrace::Location using the node_id
+ # from the backtrace location.
+ class RubyVMBacktraceLocationFind < Find
+ # Find the node for the given backtrace location using node_id.
+ #--
+ #: (Thread::Backtrace::Location location) -> Node?
+ def find(location)
+ file = location.absolute_path || location.path
+ return unless (result = parse_file(file))
+ return unless RubyVM::AbstractSyntaxTree.respond_to?(:node_id_for_backtrace_location)
+
+ node_id = RubyVM::AbstractSyntaxTree.node_id_for_backtrace_location(location)
+
+ result.value.find { |node| node.node_id == node_id }
+ end
+ end
+
+ # Finds the AST node for a Method or UnboundMethod using best-effort line
+ # matching. Used on non-CRuby implementations.
+ class LineMethodFind < Find
+ # Find the node for the given method by matching on name and line.
+ #--
+ #: (Method | UnboundMethod callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ name = callable.name
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when DefNode
+ node.name == name && node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a lambda using best-effort line matching. Used
+ # on non-CRuby implementations.
+ class LineLambdaFind < Find
+ # Find the node for the given lambda by matching on line.
+ #--
+ #: (Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when LambdaNode
+ node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a non-lambda Proc using best-effort line
+ # matching. Used on non-CRuby implementations.
+ class LineProcFind < Find
+ # Find the node for the given proc by matching on line.
+ #--
+ #: (Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when ForNode
+ node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a Thread::Backtrace::Location using best-effort
+ # line matching. Used on non-CRuby implementations.
+ class LineBacktraceLocationFind < Find
+ # Find the node for the given backtrace location by matching on line.
+ #--
+ #: (Thread::Backtrace::Location location) -> Node?
+ def find(location)
+ file = location.absolute_path || location.path
+ return unless (result = parse_file(file))
+
+ start_line = location.lineno
+ result.value.find { |node| node.location.start_line == start_line }
+ end
+ end
+ end
+end
diff --git a/lib/prism/node_inspector.rb b/lib/prism/node_inspector.rb
deleted file mode 100644
index d77af33c3a..0000000000
--- a/lib/prism/node_inspector.rb
+++ /dev/null
@@ -1,68 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- # This object is responsible for generating the output for the inspect method
- # implementations of child nodes.
- class NodeInspector # :nodoc:
- attr_reader :prefix, :output
-
- def initialize(prefix = "")
- @prefix = prefix
- @output = +""
- end
-
- # Appends a line to the output with the current prefix.
- def <<(line)
- output << "#{prefix}#{line}"
- end
-
- # This generates a string that is used as the header of the inspect output
- # for any given node.
- def header(node)
- output = +"@ #{node.class.name.split("::").last} ("
- output << "location: (#{node.location.start_line},#{node.location.start_column})-(#{node.location.end_line},#{node.location.end_column})"
- output << ", newline: true" if node.newline?
- output << ")\n"
- output
- end
-
- # Generates a string that represents a list of nodes. It handles properly
- # using the box drawing characters to make the output look nice.
- def list(prefix, nodes)
- output = +"(length: #{nodes.length})\n"
- last_index = nodes.length - 1
-
- nodes.each_with_index do |node, index|
- pointer, preadd = (index == last_index) ? ["└── ", " "] : ["├── ", "│ "]
- node_prefix = "#{prefix}#{preadd}"
- output << node.inspect(NodeInspector.new(node_prefix)).sub(node_prefix, "#{prefix}#{pointer}")
- end
-
- output
- end
-
- # Generates a string that represents a location field on a node.
- def location(value)
- if value
- "(#{value.start_line},#{value.start_column})-(#{value.end_line},#{value.end_column}) = #{value.slice.inspect}"
- else
- "∅"
- end
- end
-
- # Generates a string that represents a child node.
- def child_node(node, append)
- node.inspect(child_inspector(append)).delete_prefix(prefix)
- end
-
- # Returns a new inspector that can be used to inspect a child node.
- def child_inspector(append)
- NodeInspector.new("#{prefix}#{append}")
- end
-
- # Returns the output as a string.
- def to_str
- output
- end
- end
-end
diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb
deleted file mode 100644
index c0de8ab8b7..0000000000
--- a/lib/prism/pack.rb
+++ /dev/null
@@ -1,228 +0,0 @@
-# frozen_string_literal: true
-# typed: ignore
-
-module Prism
- # A parser for the pack template language.
- module Pack
- %i[
- SPACE
- COMMENT
- INTEGER
- UTF8
- BER
- FLOAT
- STRING_SPACE_PADDED
- STRING_NULL_PADDED
- STRING_NULL_TERMINATED
- STRING_MSB
- STRING_LSB
- STRING_HEX_HIGH
- STRING_HEX_LOW
- STRING_UU
- STRING_MIME
- STRING_BASE64
- STRING_FIXED
- STRING_POINTER
- MOVE
- BACK
- NULL
-
- UNSIGNED
- SIGNED
- SIGNED_NA
-
- AGNOSTIC_ENDIAN
- LITTLE_ENDIAN
- BIG_ENDIAN
- NATIVE_ENDIAN
- ENDIAN_NA
-
- SIZE_SHORT
- SIZE_INT
- SIZE_LONG
- SIZE_LONG_LONG
- SIZE_8
- SIZE_16
- SIZE_32
- SIZE_64
- SIZE_P
- SIZE_NA
-
- LENGTH_FIXED
- LENGTH_MAX
- LENGTH_RELATIVE
- LENGTH_NA
- ].each do |const|
- const_set(const, const)
- end
-
- # A directive in the pack template language.
- class Directive
- # A symbol representing the version of Ruby.
- attr_reader :version
-
- # A symbol representing whether or not we are packing or unpacking.
- attr_reader :variant
-
- # A byteslice of the source string that this directive represents.
- attr_reader :source
-
- # The type of the directive.
- attr_reader :type
-
- # The type of signedness of the directive.
- attr_reader :signed
-
- # The type of endianness of the directive.
- attr_reader :endian
-
- # The size of the directive.
- attr_reader :size
-
- # The length type of this directive (used for integers).
- attr_reader :length_type
-
- # The length of this directive (used for integers).
- attr_reader :length
-
- # Initialize a new directive with the given values.
- def initialize(version, variant, source, type, signed, endian, size, length_type, length)
- @version = version
- @variant = variant
- @source = source
- @type = type
- @signed = signed
- @endian = endian
- @size = size
- @length_type = length_type
- @length = length
- end
-
- # The descriptions of the various types of endianness.
- ENDIAN_DESCRIPTIONS = {
- AGNOSTIC_ENDIAN: "agnostic",
- LITTLE_ENDIAN: "little-endian (VAX)",
- BIG_ENDIAN: "big-endian (network)",
- NATIVE_ENDIAN: "native-endian",
- ENDIAN_NA: "n/a"
- }
-
- # The descriptions of the various types of signedness.
- SIGNED_DESCRIPTIONS = {
- UNSIGNED: "unsigned",
- SIGNED: "signed",
- SIGNED_NA: "n/a"
- }
-
- # The descriptions of the various types of sizes.
- SIZE_DESCRIPTIONS = {
- SIZE_SHORT: "short",
- SIZE_INT: "int-width",
- SIZE_LONG: "long",
- SIZE_LONG_LONG: "long long",
- SIZE_8: "8-bit",
- SIZE_16: "16-bit",
- SIZE_32: "32-bit",
- SIZE_64: "64-bit",
- SIZE_P: "pointer-width"
- }
-
- # Provide a human-readable description of the directive.
- def describe
- case type
- when SPACE
- "whitespace"
- when COMMENT
- "comment"
- when INTEGER
- if size == SIZE_8
- base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
- else
- base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
- end
- case length_type
- when LENGTH_FIXED
- if length > 1
- base + ", x#{length}"
- else
- base
- end
- when LENGTH_MAX
- base + ", as many as possible"
- else
- raise
- end
- when UTF8
- "UTF-8 character"
- when BER
- "BER-compressed integer"
- when FLOAT
- "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
- when STRING_SPACE_PADDED
- "arbitrary binary string (space padded)"
- when STRING_NULL_PADDED
- "arbitrary binary string (null padded, count is width)"
- when STRING_NULL_TERMINATED
- "arbitrary binary string (null padded, count is width), except that null is added with *"
- when STRING_MSB
- "bit string (MSB first)"
- when STRING_LSB
- "bit string (LSB first)"
- when STRING_HEX_HIGH
- "hex string (high nibble first)"
- when STRING_HEX_LOW
- "hex string (low nibble first)"
- when STRING_UU
- "UU-encoded string"
- when STRING_MIME
- "quoted printable, MIME encoding"
- when STRING_BASE64
- "base64 encoded string"
- when STRING_FIXED
- "pointer to a structure (fixed-length string)"
- when STRING_POINTER
- "pointer to a null-terminated string"
- when MOVE
- "move to absolute position"
- when BACK
- "back up a byte"
- when NULL
- "null byte"
- else
- raise
- end
- end
- end
-
- # The result of parsing a pack template.
- class Format
- # A list of the directives in the template.
- attr_reader :directives
-
- # The encoding of the template.
- attr_reader :encoding
-
- # Create a new Format with the given directives and encoding.
- def initialize(directives, encoding)
- @directives = directives
- @encoding = encoding
- end
-
- # Provide a human-readable description of the format.
- def describe
- source_width = directives.map { |d| d.source.inspect.length }.max
- directive_lines = directives.map do |directive|
- if directive.type == SPACE
- source = directive.source.inspect
- else
- source = directive.source
- end
- # @type var source_width: Integer
- " #{source.ljust(source_width)} #{directive.describe}"
- end
-
- (["Directives:"] + directive_lines + ["Encoding:", " #{encoding}"]).join("\n")
- end
- end
- end
-end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index b6109b0993..93d3c006b7 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -1,61 +1,175 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
+ # @rbs!
+ # # An internal interface for a cache that can be used to compute code
+ # # units from byte offsets.
+ # interface _CodeUnitsCache
+ # def []: (Integer byte_offset) -> Integer
+ # end
+
# This represents a source of Ruby code that has been parsed. It is used in
# conjunction with locations to allow them to resolve line numbers and source
# ranges.
class Source
+ # Create a new source object with the given source code. This method should
+ # be used instead of `new` and it will return either a `Source` or a
+ # specialized and more performant `ASCIISource` if no multibyte characters
+ # are present in the source code.
+ #
+ # Note that if you are calling this method manually, you will need to supply
+ # the start_line and offsets parameters. start_line is the line number that
+ # the source starts on, which is typically 1 but can be different if this
+ # source is a subset of a larger source or if this is an eval. offsets is an
+ # array of byte offsets for the start of each line in the source code, which
+ # can be calculated by iterating through the source code and recording the
+ # byte offset whenever a newline character is encountered. The first
+ # element is always 0 to mark the first line.
+ #--
+ #: (String source, Integer start_line, Array[Integer] offsets) -> Source
+ def self.for(source, start_line, offsets)
+ if source.ascii_only?
+ ASCIISource.new(source, start_line, offsets)
+ elsif source.encoding == Encoding::BINARY
+ source.force_encoding(Encoding::UTF_8)
+
+ if source.valid_encoding?
+ new(source, start_line, offsets)
+ else
+ # This is an extremely niche use case where the file is marked as
+ # binary, contains multi-byte characters, and those characters are not
+ # valid UTF-8. In this case we'll mark it as binary and fall back to
+ # treating everything as a single-byte character. This _may_ cause
+ # problems when asking for code units, but it appears to be the
+ # cleanest solution at the moment.
+ source.force_encoding(Encoding::BINARY)
+ ASCIISource.new(source, start_line, offsets)
+ end
+ else
+ new(source, start_line, offsets)
+ end
+ end
+
# The source code that this source object represents.
- attr_reader :source
+ attr_reader :source #: String
# The line number where this source starts.
- attr_reader :start_line
+ attr_reader :start_line #: Integer
+
+ # The list of newline byte offsets in the source code. When initialized from
+ # the C extension, this may be a packed binary string of uint32_t values
+ # that is lazily unpacked on first access.
+ #--
+ #: () -> Array[Integer]
+ def offsets
+ offsets = @offsets
+ return offsets if offsets.is_a?(Array)
+ @offsets = offsets.unpack("L*")
+ end
+
+ # Create a new source object with the given source code. The offsets
+ # parameter can be either an Array of Integer byte offsets or a packed
+ # binary string of uint32_t values (from the C extension).
+ #--
+ #: (String source, Integer start_line, Array[Integer] | String offsets) -> void
+ def initialize(source, start_line, offsets)
+ @source = source
+ @start_line = start_line
+ @offsets = offsets
+ end
- # The list of newline byte offsets in the source code.
- attr_reader :offsets
+ # Replace the value of start_line with the given value.
+ #--
+ #: (Integer start_line) -> void
+ def replace_start_line(start_line)
+ @start_line = start_line
+ end
- # Create a new source object with the given source code.
- def initialize(source, start_line = 1, offsets = [])
- @source = source
- @start_line = start_line # set after parsing is done
- @offsets = offsets # set after parsing is done
+ # Replace the value of offsets with the given value.
+ #--
+ #: (Array[Integer] offsets) -> void
+ def replace_offsets(offsets)
+ @offsets = offsets
end
# Returns the encoding of the source code, which is set by parameters to the
# parser or by the encoding magic comment.
+ #--
+ #: () -> Encoding
def encoding
source.encoding
end
+ # Returns the lines of the source code as an array of strings.
+ #--
+ #: () -> Array[String]
+ def lines
+ source.lines
+ end
+
# Perform a byteslice on the source code using the given byte offset and
# byte length.
+ #--
+ #: (Integer byte_offset, Integer length) -> String
def slice(byte_offset, length)
source.byteslice(byte_offset, length) or raise
end
+ # Converts the line number and column in bytes to a byte offset.
+ #--
+ #: (Integer line, Integer column) -> Integer
+ def byte_offset(line, column)
+ normal = line - @start_line
+ raise IndexError if normal < 0
+ offsets.fetch(normal) + column
+ rescue IndexError
+ raise ArgumentError, "line #{line} is out of range"
+ end
+
# Binary search through the offsets to find the line number for the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line(byte_offset)
start_line + find_line(byte_offset)
end
# Return the byte offset of the start of the line corresponding to the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line_start(byte_offset)
offsets[find_line(byte_offset)]
end
- # Return the column number for the given byte offset.
+ # Returns the byte offset of the end of the line corresponding to the given
+ # byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
+ def line_end(byte_offset)
+ offsets[find_line(byte_offset) + 1] || source.bytesize
+ end
+
+ # Return the column in bytes for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def column(byte_offset)
byte_offset - line_start(byte_offset)
end
# Return the character offset for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_offset(byte_offset)
(source.byteslice(0, byte_offset) or raise).length
end
- # Return the column number in characters for the given byte offset.
+ # Return the column in characters for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_column(byte_offset)
character_offset(byte_offset) - character_offset(line_start(byte_offset))
end
@@ -66,37 +180,215 @@ module Prism
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
# concept of code units that differs from the number of characters in other
# encodings, it is not captured here.
+ #
+ # We purposefully replace invalid and undefined characters with replacement
+ # characters in this conversion. This happens for two reasons. First, it's
+ # possible that the given byte offset will not occur on a character
+ # boundary. Second, it's possible that the source code will contain a
+ # character that has no equivalent in the given encoding.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_offset(byte_offset, encoding)
- byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
- (encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE) ? (byteslice.bytesize / 2) : byteslice.length
+ return byte_offset if encoding == Encoding::UTF_8
+
+ byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
+
+ if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
+ byteslice.bytesize / 2
+ else
+ byteslice.length
+ end
+ end
+
+ # Generate a cache that targets a specific encoding for calculating code
+ # unit offsets.
+ #--
+ #: (Encoding encoding) -> CodeUnitsCache
+ def code_units_cache(encoding)
+ CodeUnitsCache.new(source, encoding)
end
- # Returns the column number in code units for the given encoding for the
+ # Returns the column in code units for the given encoding for the
# given byte offset.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_column(byte_offset, encoding)
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
end
- private
+ # Freeze this object and the objects it contains.
+ #--
+ #: () -> void
+ def deep_freeze
+ source.freeze
+ offsets.freeze
+ freeze
+ end
- # Binary search through the offsets to find the line number for the given
+ # Binary search through the offsets to find the index for the given
# byte offset.
- def find_line(byte_offset)
- left = 0
- right = offsets.length - 1
+ #--
+ #: (Integer byte_offset) -> Integer
+ def find_line(byte_offset) # :nodoc:
+ index = offsets.bsearch_index { |offset| offset > byte_offset } || offsets.length
+ index - 1
+ end
+ end
+
+ # A cache that can be used to quickly compute code unit offsets from byte
+ # offsets. It purposefully provides only a single #[] method to access the
+ # cache in order to minimize surface area.
+ #
+ # Note that there are some known issues here that may or may not be addressed
+ # in the future:
+ #
+ # * The first is that there are issues when the cache computes values that are
+ # not on character boundaries. This can result in subsequent computations
+ # being off by one or more code units.
+ # * The second is that this cache is currently unbounded. In theory we could
+ # introduce some kind of LRU cache to limit the number of entries, but this
+ # has not yet been implemented.
+ #
+ class CodeUnitsCache
+ # Counter used for UTF-8, where one code unit equals one byte.
+ class UTF8Counter # :nodoc:
+ #: (Integer byte_offset, Integer byte_length) -> Integer
+ def count(byte_offset, byte_length)
+ byte_length
+ end
+ end
+
+ class UTF16Counter # :nodoc:
+ # @rbs @source: String
+ # @rbs @encoding: Encoding
+
+ #: (String source, Encoding encoding) -> void
+ def initialize(source, encoding)
+ @source = source
+ @encoding = encoding
+ end
+
+ #: (Integer byte_offset, Integer byte_length) -> Integer
+ def count(byte_offset, byte_length)
+ (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
+ end
+ end
- while left <= right
- mid = left + (right - left) / 2
- return mid if (offset = offsets[mid]) == byte_offset
+ # Counter used for UTF-32, where one code unit equals one code point and
+ # matches String#length. Also used as a best-effort fallback for any other
+ # encoding that does not have a dedicated counter.
+ class UTF32Counter # :nodoc:
+ # @rbs @source: String
+ # @rbs @encoding: Encoding
- if offset < byte_offset
- left = mid + 1
+ #: (String source, Encoding encoding) -> void
+ def initialize(source, encoding)
+ @source = source
+ @encoding = encoding
+ end
+
+ #: (Integer byte_offset, Integer byte_length) -> Integer
+ def count(byte_offset, byte_length)
+ (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).length
+ end
+ end
+
+ private_constant :UTF8Counter, :UTF16Counter, :UTF32Counter
+
+ # @rbs @source: String
+ # @rbs @counter: UTF8Counter | UTF16Counter | UTF32Counter
+ # @rbs @cache: Hash[Integer, Integer]
+ # @rbs @offsets: Array[Integer]
+
+ # Initialize a new cache with the given source and encoding.
+ #--
+ #: (String source, Encoding encoding) -> void
+ def initialize(source, encoding)
+ @source = source
+ @counter =
+ case encoding
+ when Encoding::UTF_8
+ UTF8Counter.new
+ when Encoding::UTF_16LE, Encoding::UTF_16BE
+ UTF16Counter.new(source, encoding)
else
- right = mid - 1
+ UTF32Counter.new(source, encoding)
end
- end
- left - 1
+ @cache = {} #: Hash[Integer, Integer]
+ @offsets = [] #: Array[Integer]
+ end
+
+ # Retrieve the code units offset from the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
+ def [](byte_offset)
+ @cache[byte_offset] ||=
+ if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
+ @offsets << byte_offset
+ @counter.count(0, byte_offset)
+ elsif index == 0
+ @offsets.unshift(byte_offset)
+ @counter.count(0, byte_offset)
+ else
+ @offsets.insert(index, byte_offset)
+ offset = @offsets[index - 1]
+ @cache[offset] + @counter.count(offset, byte_offset - offset)
+ end
+ end
+ end
+
+ # Specialized version of Prism::Source for source code that includes ASCII
+ # characters only. This class is used to apply performance optimizations that
+ # cannot be applied to sources that include multibyte characters.
+ #
+ # In the extremely rare case that a source includes multi-byte characters but
+ # is marked as binary because of a magic encoding comment and it cannot be
+ # eagerly converted to UTF-8, this class will be used as well. This is because
+ # at that point we will treat everything as single-byte characters.
+ class ASCIISource < Source
+ # Return the character offset for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
+ def character_offset(byte_offset)
+ byte_offset
+ end
+
+ # Return the column in characters for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
+ def character_column(byte_offset)
+ byte_offset - line_start(byte_offset)
+ end
+
+ # Returns the offset from the start of the file for the given byte offset
+ # counting in code units for the given encoding.
+ #
+ # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
+ # concept of code units that differs from the number of characters in other
+ # encodings, it is not captured here.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
+ def code_units_offset(byte_offset, encoding)
+ byte_offset
+ end
+
+ # Returns a cache that is the identity function in order to maintain the
+ # same interface. We can do this because code units are always equivalent to
+ # byte offsets for ASCII-only sources.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
+ def code_units_cache(encoding)
+ ->(byte_offset) { byte_offset }
+ end
+
+ # Specialized version of `code_units_column` that does not depend on
+ # `code_units_offset`, which is a more expensive operation. This is
+ # essentially the same as `Prism::Source#column`.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
+ def code_units_column(byte_offset, encoding)
+ byte_offset - line_start(byte_offset)
end
end
@@ -104,18 +396,23 @@ module Prism
class Location
# A Source object that is used to determine more information from the given
# offset and length.
- attr_reader :source
+ attr_reader :source #: Source
protected :source
# The byte offset from the beginning of the source where this location
# starts.
- attr_reader :start_offset
+ attr_reader :start_offset #: Integer
# The length of this location in bytes.
- attr_reader :length
+ attr_reader :length #: Integer
+
+ # @rbs @leading_comments: Array[Comment]?
+ # @rbs @trailing_comments: Array[Comment]?
# Create a new location object with the given source, start byte offset, and
# byte length.
+ #--
+ #: (Source source, Integer start_offset, Integer length) -> void
def initialize(source, start_offset, length)
@source = source
@start_offset = start_offset
@@ -130,137 +427,244 @@ module Prism
# These are the comments that are associated with this location that exist
# before the start of this location.
+ #--
+ #: () -> Array[Comment]
def leading_comments
@leading_comments ||= []
end
# Attach a comment to the leading comments of this location.
+ #--
+ #: (Comment comment) -> void
def leading_comment(comment)
leading_comments << comment
end
# These are the comments that are associated with this location that exist
# after the end of this location.
+ #--
+ #: () -> Array[Comment]
def trailing_comments
@trailing_comments ||= []
end
# Attach a comment to the trailing comments of this location.
+ #--
+ #: (Comment comment) -> void
def trailing_comment(comment)
trailing_comments << comment
end
# Returns all comments that are associated with this location (both leading
# and trailing comments).
+ #--
+ #: () -> Array[Comment]
def comments
- [*@leading_comments, *@trailing_comments]
+ [*@leading_comments, *@trailing_comments] #: Array[Comment]
end
# Create a new location object with the given options.
+ #--
+ #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location
def copy(source: self.source, start_offset: self.start_offset, length: self.length)
Location.new(source, start_offset, length)
end
+ # Returns a new location that is the result of chopping off the last byte.
+ #--
+ #: () -> Location
+ def chop
+ copy(length: length == 0 ? length : length - 1)
+ end
+
# Returns a string representation of this location.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
end
+ # Returns all of the lines of the source code associated with this location.
+ #--
+ #: () -> Array[String]
+ def source_lines
+ source.lines
+ end
+
# The source code that this location represents.
+ #--
+ #: () -> String
def slice
source.slice(start_offset, length)
end
+ # The source code that this location represents starting from the beginning
+ # of the line that this location starts on to the end of the line that this
+ # location ends on.
+ #--
+ #: () -> String
+ def slice_lines
+ line_start = source.line_start(start_offset)
+ line_end = source.line_end(end_offset)
+ source.slice(line_start, line_end - line_start)
+ end
+
# The character offset from the beginning of the source where this location
# starts.
+ #--
+ #: () -> Integer
def start_character_offset
source.character_offset(start_offset)
end
# The offset from the start of the file in code units of the given encoding.
+ #--
+ #: (Encoding encoding) -> Integer
def start_code_units_offset(encoding = Encoding::UTF_16LE)
source.code_units_offset(start_offset, encoding)
end
+ # The start offset from the start of the file in code units using the given
+ # cache to fetch or calculate the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_start_code_units_offset(cache)
+ cache[start_offset]
+ end
+
# The byte offset from the beginning of the source where this location ends.
+ #--
+ #: () -> Integer
def end_offset
start_offset + length
end
# The character offset from the beginning of the source where this location
# ends.
+ #--
+ #: () -> Integer
def end_character_offset
source.character_offset(end_offset)
end
# The offset from the start of the file in code units of the given encoding.
+ #--
+ #: (Encoding encoding) -> Integer
def end_code_units_offset(encoding = Encoding::UTF_16LE)
source.code_units_offset(end_offset, encoding)
end
+ # The end offset from the start of the file in code units using the given
+ # cache to fetch or calculate the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_end_code_units_offset(cache)
+ cache[end_offset]
+ end
+
# The line number where this location starts.
+ #--
+ #: () -> Integer
def start_line
source.line(start_offset)
end
# The content of the line where this location starts before this location.
+ #--
+ #: () -> String
def start_line_slice
offset = source.line_start(start_offset)
source.slice(offset, start_offset - offset)
end
# The line number where this location ends.
+ #--
+ #: () -> Integer
def end_line
source.line(end_offset)
end
- # The column number in bytes where this location starts from the start of
+ # The column in bytes where this location starts from the start of
# the line.
+ #--
+ #: () -> Integer
def start_column
source.column(start_offset)
end
- # The column number in characters where this location ends from the start of
+ # The column in characters where this location ends from the start of
# the line.
+ #--
+ #: () -> Integer
def start_character_column
source.character_column(start_offset)
end
- # The column number in code units of the given encoding where this location
+ # The column in code units of the given encoding where this location
# starts from the start of the line.
+ #--
+ #: (?Encoding encoding) -> Integer
def start_code_units_column(encoding = Encoding::UTF_16LE)
source.code_units_column(start_offset, encoding)
end
- # The column number in bytes where this location ends from the start of the
+ # The start column in code units using the given cache to fetch or calculate
+ # the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_start_code_units_column(cache)
+ cache[start_offset] - cache[source.line_start(start_offset)]
+ end
+
+ # The column in bytes where this location ends from the start of the
# line.
+ #--
+ #: () -> Integer
def end_column
source.column(end_offset)
end
- # The column number in characters where this location ends from the start of
+ # The column in characters where this location ends from the start of
# the line.
+ #--
+ #: () -> Integer
def end_character_column
source.character_column(end_offset)
end
- # The column number in code units of the given encoding where this location
+ # The column in code units of the given encoding where this location
# ends from the start of the line.
+ #--
+ #: (?Encoding encoding) -> Integer
def end_code_units_column(encoding = Encoding::UTF_16LE)
source.code_units_column(end_offset, encoding)
end
+ # The end column in code units using the given cache to fetch or calculate
+ # the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
+ def cached_end_code_units_column(cache)
+ cache[end_offset] - cache[source.line_start(end_offset)]
+ end
+
# Implement the hash pattern matching interface for Location.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ start_offset: start_offset, end_offset: end_offset }
end
# Implement the pretty print interface for Location.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
end
# Returns true if the given other location is equal to this location.
+ #--
+ #: (untyped other) -> bool
def ==(other)
Location === other &&
other.start_offset == start_offset &&
@@ -270,34 +674,65 @@ module Prism
# Returns a new location that stretches from this location to the given
# other location. Raises an error if this location is not before the other
# location or if they don't share the same source.
+ #--
+ #: (Location other) -> Location
def join(other)
raise "Incompatible sources" if source != other.source
raise "Incompatible locations" if start_offset > other.start_offset
Location.new(source, start_offset, other.end_offset - start_offset)
end
+
+ # Join this location with the first occurrence of the string in the source
+ # that occurs after this location on the same line, and return the new
+ # location. This will raise an error if the string does not exist.
+ #--
+ #: (String string) -> Location
+ def adjoin(string)
+ line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
+
+ line_suffix_index = line_suffix.byteindex(string)
+ raise "Could not find #{string}" if line_suffix_index.nil?
+
+ Location.new(source, start_offset, length + line_suffix_index + string.bytesize)
+ end
end
# This represents a comment that was encountered during parsing. It is the
# base class for all comment types.
class Comment
- # The location of this comment in the source.
- attr_reader :location
+ # The Location of this comment in the source.
+ attr_reader :location #: Location
# Create a new comment object with the given location.
+ #--
+ #: (Location location) -> void
def initialize(location)
@location = location
end
# Implement the hash pattern matching interface for Comment.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ location: location }
end
# Returns the content of the comment by slicing it from the source code.
+ #--
+ #: () -> String
def slice
location.slice
end
+
+ # Returns true if this comment happens on the same line as other code and
+ # false if the comment is by itself. This can only be true for inline
+ # comments and should be false for block comments.
+ #--
+ #: () -> bool
+ def trailing?
+ raise NotImplementedError, "trailing? is not implemented for #{self.class}"
+ end
end
# InlineComment objects are the most common. They correspond to comments in
@@ -305,12 +740,16 @@ module Prism
class InlineComment < Comment
# Returns true if this comment happens on the same line as other code and
# false if the comment is by itself.
+ #--
+ #: () -> bool
def trailing?
!location.start_line_slice.strip.empty?
end
# Returns a string representation of this comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::InlineComment @location=#{location.inspect}>"
end
end
@@ -318,13 +757,17 @@ module Prism
# EmbDocComment objects correspond to comments that are surrounded by =begin
# and =end.
class EmbDocComment < Comment
- # This can only be true for inline comments.
+ # Returns false. This can only be true for inline comments.
+ #--
+ #: () -> bool
def trailing?
false
end
# Returns a string representation of this comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::EmbDocComment @location=#{location.inspect}>"
end
end
@@ -332,34 +775,44 @@ module Prism
# This represents a magic comment that was encountered during parsing.
class MagicComment
# A Location object representing the location of the key in the source.
- attr_reader :key_loc
+ attr_reader :key_loc #: Location
# A Location object representing the location of the value in the source.
- attr_reader :value_loc
+ attr_reader :value_loc #: Location
# Create a new magic comment object with the given key and value locations.
+ #--
+ #: (Location key_loc, Location value_loc) -> void
def initialize(key_loc, value_loc)
@key_loc = key_loc
@value_loc = value_loc
end
# Returns the key of the magic comment by slicing it from the source code.
+ #--
+ #: () -> String
def key
key_loc.slice
end
# Returns the value of the magic comment by slicing it from the source code.
+ #--
+ #: () -> String
def value
value_loc.slice
end
# Implement the hash pattern matching interface for MagicComment.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ key_loc: key_loc, value_loc: value_loc }
end
# Returns a string representation of this magic comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
end
end
@@ -368,18 +821,20 @@ module Prism
class ParseError
# The type of error. This is an _internal_ symbol that is used for
# communicating with translation layers. It is not meant to be public API.
- attr_reader :type
+ attr_reader :type #: Symbol
# The message associated with this error.
- attr_reader :message
+ attr_reader :message #: String
# A Location object representing the location of this error in the source.
- attr_reader :location
+ attr_reader :location #: Location
# The level of this error.
- attr_reader :level
+ attr_reader :level #: Symbol
# Create a new error object with the given message and location.
+ #--
+ #: (Symbol type, String message, Location location, Symbol level) -> void
def initialize(type, message, location, level)
@type = type
@message = message
@@ -388,12 +843,16 @@ module Prism
end
# Implement the hash pattern matching interface for ParseError.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, message: message, location: location, level: level }
end
# Returns a string representation of this error.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
end
end
@@ -402,18 +861,20 @@ module Prism
class ParseWarning
# The type of warning. This is an _internal_ symbol that is used for
# communicating with translation layers. It is not meant to be public API.
- attr_reader :type
+ attr_reader :type #: Symbol
# The message associated with this warning.
- attr_reader :message
+ attr_reader :message #: String
# A Location object representing the location of this warning in the source.
- attr_reader :location
+ attr_reader :location #: Location
# The level of this warning.
- attr_reader :level
+ attr_reader :level #: Symbol
# Create a new warning object with the given message and location.
+ #--
+ #: (Symbol type, String message, Location location, Symbol level) -> void
def initialize(type, message, location, level)
@type = type
@message = message
@@ -422,92 +883,233 @@ module Prism
end
# Implement the hash pattern matching interface for ParseWarning.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, message: message, location: location, level: level }
end
# Returns a string representation of this warning.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
end
end
- # This represents the result of a call to ::parse or ::parse_file. It contains
- # the AST, any comments that were encounters, and any errors that were
- # encountered.
- class ParseResult
- # The value that was generated by parsing. Normally this holds the AST, but
- # it can sometimes how a list of tokens or other results passed back from
- # the parser.
- attr_reader :value
-
+ # This represents the result of a call to Prism.parse or Prism.parse_file.
+ # It contains the requested structure, any comments that were encounters,
+ # and any errors that were encountered.
+ class Result
# The list of comments that were encountered during parsing.
- attr_reader :comments
+ attr_reader :comments #: Array[Comment]
# The list of magic comments that were encountered during parsing.
- attr_reader :magic_comments
+ attr_reader :magic_comments #: Array[MagicComment]
# An optional location that represents the location of the __END__ marker
# and the rest of the content of the file. This content is loaded into the
# DATA constant when the file being parsed is the main file being executed.
- attr_reader :data_loc
+ attr_reader :data_loc #: Location?
# The list of errors that were generated during parsing.
- attr_reader :errors
+ attr_reader :errors #: Array[ParseError]
# The list of warnings that were generated during parsing.
- attr_reader :warnings
+ attr_reader :warnings #: Array[ParseWarning]
# A Source instance that represents the source code that was parsed.
- attr_reader :source
+ attr_reader :source #: Source
- # Create a new parse result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
- @value = value
+ # Create a new result object with the given values.
+ #--
+ #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source)
@comments = comments
@magic_comments = magic_comments
@data_loc = data_loc
@errors = errors
@warnings = warnings
+ @continuable = continuable
@source = source
end
- # Implement the hash pattern matching interface for ParseResult.
- def deconstruct_keys(keys)
- { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
+ # Implement the hash pattern matching interface for Result.
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
+ { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
end
# Returns the encoding of the source code that was parsed.
+ #--
+ #: () -> Encoding
def encoding
source.encoding
end
# Returns true if there were no errors during parsing and false if there
# were.
+ #--
+ #: () -> bool
def success?
errors.empty?
end
# Returns true if there were errors during parsing and false if there were
# not.
+ #--
+ #: () -> bool
def failure?
!success?
end
+
+ # Returns true if the parsed source is an incomplete expression that could
+ # become valid with additional input. This is useful for REPL contexts (such
+ # as IRB) where the user may be entering a multi-line expression one line at
+ # a time and the implementation needs to determine whether to wait for more
+ # input or to evaluate what has been entered so far.
+ #
+ # Concretely, this returns true when every error present is caused by the
+ # parser reaching the end of the input before a construct was closed (e.g.
+ # an unclosed string, array, block, or keyword), and returns false when any
+ # error is caused by a token that makes the input structurally invalid
+ # regardless of what might follow (e.g. a stray `end`, `]`, or `)` with no
+ # matching opener).
+ #
+ # Examples:
+ #
+ # Prism.parse("1 + [").continuable? #=> true (unclosed array)
+ # Prism.parse("1 + ]").continuable? #=> false (stray ])
+ # Prism.parse("tap do").continuable? #=> true (unclosed block)
+ # Prism.parse("end.tap do").continuable? #=> false (stray end)
+ #
+ #--
+ #: () -> bool
+ def continuable?
+ @continuable
+ end
+
+ # Create a code units cache for the given encoding.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
+ def code_units_cache(encoding)
+ source.code_units_cache(encoding)
+ end
+ end
+
+ # This is a result specific to the `parse` and `parse_file` methods.
+ class ParseResult < Result
+ autoload :Comments, "prism/parse_result/comments"
+ autoload :Errors, "prism/parse_result/errors"
+ autoload :Newlines, "prism/parse_result/newlines"
+
+ private_constant :Comments
+ private_constant :Errors
+ private_constant :Newlines
+
+ # The syntax tree that was parsed from the source code.
+ attr_reader :value #: ProgramNode
+
+ # Create a new parse result object with the given values.
+ #--
+ #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ end
+
+ # Implement the hash pattern matching interface for ParseResult.
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
+ super.merge!(value: value)
+ end
+
+ # Attach the list of comments to their respective locations in the tree.
+ #--
+ #: () -> void
+ def attach_comments!
+ Comments.new(self).attach! # steep:ignore
+ end
+
+ # Walk the tree and mark nodes that are on a new line, loosely emulating
+ # the behavior of CRuby's `:line` tracepoint event.
+ #--
+ #: () -> void
+ def mark_newlines!
+ value.accept(Newlines.new(source.offsets.size)) # steep:ignore
+ end
+
+ # Returns a string representation of the syntax tree with the errors
+ # displayed inline.
+ #--
+ #: () -> String
+ def errors_format
+ Errors.new(self).format
+ end
+ end
+
+ # This is a result specific to the `lex` and `lex_file` methods.
+ class LexResult < Result
+ # The list of tokens that were parsed from the source code.
+ attr_reader :value #: Array[[Token, Integer]]
+
+ # Create a new lex result object with the given values.
+ #--
+ #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ end
+
+ # Implement the hash pattern matching interface for LexResult.
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
+ super.merge!(value: value)
+ end
+ end
+
+ # This is a result specific to the `parse_lex` and `parse_lex_file` methods.
+ class ParseLexResult < Result
+ # A tuple of the syntax tree and the list of tokens that were parsed from
+ # the source code.
+ attr_reader :value #: [ProgramNode, Array[[Token, Integer]]]
+
+ # Create a new parse lex result object with the given values.
+ #--
+ #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ @value = value
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
+ end
+
+ # Implement the hash pattern matching interface for ParseLexResult.
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
+ super.merge!(value: value)
+ end
end
# This represents a token from the Ruby source.
class Token
# The Source object that represents the source this token came from.
- attr_reader :source
+ attr_reader :source #: Source
private :source
# The type of token that this token is.
- attr_reader :type
+ attr_reader :type #: Symbol
# A byteslice of the source that this token represents.
- attr_reader :value
+ attr_reader :value #: String
+
+ # @rbs @location: Location | Integer
# Create a new token object with the given type, value, and location.
+ #--
+ #: (Source source, Symbol type, String value, Location | Integer location) -> void
def initialize(source, type, value, location)
@source = source
@type = type
@@ -516,11 +1118,15 @@ module Prism
end
# Implement the hash pattern matching interface for Token.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, value: value, location: location }
end
# A Location object representing the location of this token in the source.
+ #--
+ #: () -> Location
def location
location = @location
return location if location.is_a?(Location)
@@ -528,7 +1134,9 @@ module Prism
end
# Implement the pretty print interface for Token.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.group do
q.text(type.to_s)
self.location.pretty_print(q)
@@ -543,10 +1151,61 @@ module Prism
end
# Returns true if the given other token is equal to this token.
+ #--
+ #: (untyped other) -> bool
def ==(other)
Token === other &&
other.type == type &&
other.value == value
end
+
+ # Returns a string representation of this token.
+ #--
+ #: () -> String
+ def inspect # :nodoc:
+ location
+ super
+ end
+
+ # Freeze this object and the objects it contains.
+ #--
+ #: () -> void
+ def deep_freeze
+ value.freeze
+ location.freeze
+ freeze
+ end
+ end
+
+ # This object is passed to the various Prism.* methods that accept the
+ # `scopes` option as an element of the list. It defines both the local
+ # variables visible at that scope as well as the forwarding parameters
+ # available at that scope.
+ class Scope
+ # The list of local variables that are defined in this scope. This should be
+ # defined as an array of symbols.
+ attr_reader :locals #: Array[Symbol]
+
+ # The list of local variables that are forwarded to the next scope. This
+ # should by defined as an array of symbols containing the specific values of
+ # :*, :**, :&, or :"...".
+ attr_reader :forwarding #: Array[Symbol]
+
+ # Create a new scope object with the given locals and forwarding.
+ #--
+ #: (Array[Symbol] locals, Array[Symbol] forwarding) -> void
+ def initialize(locals, forwarding)
+ @locals = locals
+ @forwarding = forwarding
+ end
+ end
+
+ # Create a new scope with the given locals and forwarding options that is
+ # suitable for passing into one of the Prism.* methods that accepts the
+ # `scopes` option.
+ #--
+ #: (?locals: Array[Symbol], ?forwarding: Array[Symbol]) -> Scope
+ def self.scope(locals: [], forwarding: [])
+ Scope.new(locals, forwarding)
end
end
diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb
index f8f74d2503..df80792d39 100644
--- a/lib/prism/parse_result/comments.rb
+++ b/lib/prism/parse_result/comments.rb
@@ -1,7 +1,10 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
- class ParseResult
+ class ParseResult < Result
# When we've parsed the source, we have both the syntax tree and the list of
# comments that we found in the source. This class is responsible for
# walking the tree and finding the nearest location to attach each comment.
@@ -17,32 +20,49 @@ module Prism
# the comment. Otherwise it will favor attaching to the nearest location
# that is after the comment.
class Comments
+ # @rbs!
+ # # An internal interface for a target that comments can be attached
+ # # to. This is either going to be a NodeTarget or a CommentTarget.
+ # interface _CommentTarget
+ # def start_offset: () -> Integer
+ # def end_offset: () -> Integer
+ # def encloses?: (Comment) -> bool
+ # def leading_comment: (Comment) -> void
+ # def trailing_comment: (Comment) -> void
+ # end
+
# A target for attaching comments that is based on a specific node's
# location.
class NodeTarget # :nodoc:
- attr_reader :node
+ attr_reader :node #: node
+ #: (node node) -> void
def initialize(node)
@node = node
end
+ #: () -> Integer
def start_offset
node.start_offset
end
+ #: () -> Integer
def end_offset
node.end_offset
end
+ #: (Comment comment) -> bool
def encloses?(comment)
start_offset <= comment.location.start_offset &&
comment.location.end_offset <= end_offset
end
+ #: (Comment comment) -> void
def leading_comment(comment)
node.location.leading_comment(comment)
end
+ #: (Comment comment) -> void
def trailing_comment(comment)
node.location.trailing_comment(comment)
end
@@ -51,44 +71,54 @@ module Prism
# A target for attaching comments that is based on a location field on a
# node. For example, the `end` token of a ClassNode.
class LocationTarget # :nodoc:
- attr_reader :location
+ attr_reader :location #: Location
+ #: (Location location) -> void
def initialize(location)
@location = location
end
+ #: () -> Integer
def start_offset
location.start_offset
end
+ #: () -> Integer
def end_offset
location.end_offset
end
+ #: (Comment comment) -> bool
def encloses?(comment)
false
end
+ #: (Comment comment) -> void
def leading_comment(comment)
location.leading_comment(comment)
end
+ #: (Comment comment) -> void
def trailing_comment(comment)
location.trailing_comment(comment)
end
end
# The parse result that we are attaching comments to.
- attr_reader :parse_result
+ attr_reader :parse_result #: ParseResult
# Create a new Comments object that will attach comments to the given
# parse result.
+ #--
+ #: (ParseResult parse_result) -> void
def initialize(parse_result)
@parse_result = parse_result
end
# Attach the comments to their respective locations in the tree by
# mutating the parse result.
+ #--
+ #: () -> void
def attach!
parse_result.comments.each do |comment|
preceding, enclosing, following = nearest_targets(parse_result.value, comment)
@@ -116,11 +146,13 @@ module Prism
# Responsible for finding the nearest targets to the given comment within
# the context of the given encapsulating node.
+ #--
+ #: (node node, Comment comment) -> [_CommentTarget?, _CommentTarget?, _CommentTarget?]
def nearest_targets(node, comment)
comment_start = comment.location.start_offset
comment_end = comment.location.end_offset
- targets = [] #: Array[_Target]
+ targets = [] #: Array[_CommentTarget]
node.comment_targets.map do |value|
case value
when StatementsNode
@@ -133,8 +165,8 @@ module Prism
end
targets.sort_by!(&:start_offset)
- preceding = nil #: _Target?
- following = nil #: _Target?
+ preceding = nil #: _CommentTarget?
+ following = nil #: _CommentTarget?
left = 0
right = targets.length
@@ -183,12 +215,5 @@ module Prism
[preceding, NodeTarget.new(node), following]
end
end
-
- private_constant :Comments
-
- # Attach the list of comments to their respective locations in the tree.
- def attach_comments!
- Comments.new(self).attach! # steep:ignore
- end
end
end
diff --git a/lib/prism/parse_result/errors.rb b/lib/prism/parse_result/errors.rb
new file mode 100644
index 0000000000..388309d23d
--- /dev/null
+++ b/lib/prism/parse_result/errors.rb
@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+
+require "stringio"
+
+module Prism
+ class ParseResult < Result
+ # An object to represent the set of errors on a parse result. This object
+ # can be used to format the errors in a human-readable way.
+ class Errors
+ # The parse result that contains the errors.
+ attr_reader :parse_result #: ParseResult
+
+ # Initialize a new set of errors from the given parse result.
+ #--
+ #: (ParseResult parse_result) -> void
+ def initialize(parse_result)
+ @parse_result = parse_result
+ end
+
+ # Formats the errors in a human-readable way and return them as a string.
+ #--
+ #: () -> String
+ def format
+ error_lines = {} #: Hash[Integer, Array[ParseError]]
+ parse_result.errors.each do |error|
+ location = error.location
+ (location.start_line..location.end_line).each do |line|
+ error_lines[line] ||= []
+ error_lines[line] << error
+ end
+ end
+
+ source_lines = parse_result.source.source.lines
+ source_lines << "" if error_lines.key?(source_lines.size + 1)
+
+ io = StringIO.new
+ source_lines.each.with_index(1) do |line, line_number|
+ io.puts(line)
+
+ (error_lines.delete(line_number) || []).each do |error|
+ location = error.location
+
+ case line_number
+ when location.start_line
+ io.print(" " * location.start_column + "^")
+
+ if location.start_line == location.end_line
+ if location.start_column != location.end_column
+ io.print("~" * (location.end_column - location.start_column - 1))
+ end
+
+ io.puts(" " + error.message)
+ else
+ io.puts("~" * (line.bytesize - location.start_column))
+ end
+ when location.end_line
+ io.puts("~" * location.end_column + " " + error.message)
+ else
+ io.puts("~" * line.bytesize)
+ end
+ end
+ end
+
+ io.puts
+ io.string
+ end
+ end
+ end
+end
diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb
index 03acb0b862..450c790226 100644
--- a/lib/prism/parse_result/newlines.rb
+++ b/lib/prism/parse_result/newlines.rb
@@ -1,7 +1,10 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
- class ParseResult
+ class ParseResult < Result
# The :line tracepoint event gets fired whenever the Ruby VM encounters an
# expression on a new line. The types of expressions that can trigger this
# event are:
@@ -17,50 +20,185 @@ module Prism
# Note that the logic in this file should be kept in sync with the Java
# MarkNewlinesVisitor, since that visitor is responsible for marking the
# newlines for JRuby/TruffleRuby.
+ #
+ # This file is autoloaded only when `mark_newlines!` is called, so the
+ # re-opening of the various nodes in this file will only be performed in
+ # that case. We do that to avoid storing the extra `@newline` instance
+ # variable on every node if we don't need it.
class Newlines < Visitor
+ # The map of lines indices to whether or not they have been marked as
+ # emitting a newline event.
+ # @rbs @lines: Array[bool]
+
# Create a new Newlines visitor with the given newline offsets.
- def initialize(newline_marked)
- @newline_marked = newline_marked
+ #--
+ #: (Integer lines) -> void
+ def initialize(lines)
+ @lines = Array.new(1 + lines, false)
end
- # Permit block/lambda nodes to mark newlines within themselves.
+ # Permit block nodes to mark newlines within themselves.
+ #--
+ #: (BlockNode node) -> void
def visit_block_node(node)
- old_newline_marked = @newline_marked
- @newline_marked = Array.new(old_newline_marked.size, false)
+ old_lines = @lines
+ @lines = Array.new(old_lines.size, false)
begin
super(node)
ensure
- @newline_marked = old_newline_marked
+ @lines = old_lines
end
end
- alias_method :visit_lambda_node, :visit_block_node
+ # Permit lambda nodes to mark newlines within themselves.
+ #--
+ #: (LambdaNode node) -> void
+ def visit_lambda_node(node)
+ old_lines = @lines
+ @lines = Array.new(old_lines.size, false)
- # Mark if/unless nodes as newlines.
+ begin
+ super(node)
+ ensure
+ @lines = old_lines
+ end
+ end
+
+ # Mark if nodes as newlines.
+ #--
+ #: (IfNode node) -> void
def visit_if_node(node)
- node.set_newline_flag(@newline_marked)
+ node.newline_flag!(@lines)
super(node)
end
- alias_method :visit_unless_node, :visit_if_node
+ # Mark unless nodes as newlines.
+ #--
+ #: (UnlessNode node) -> void
+ def visit_unless_node(node)
+ node.newline_flag!(@lines)
+ super(node)
+ end
# Permit statements lists to mark newlines within themselves.
+ #--
+ #: (StatementsNode node) -> void
def visit_statements_node(node)
node.body.each do |child|
- child.set_newline_flag(@newline_marked)
+ child.newline_flag!(@lines)
end
super(node)
end
end
+ end
+
+ class Node
+ # Tracks whether or not this node should emit a newline event when the
+ # instructions that it represents are executed.
+ # @rbs @newline_flag: bool
+
+ #: () -> bool
+ def newline_flag? # :nodoc:
+ !!defined?(@newline_flag)
+ end
+
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ line = location.start_line
+ unless lines[line]
+ lines[line] = true
+ @newline_flag = true
+ end
+ end
+ end
+
+ class BeginNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ # Never mark BeginNode with a newline flag, mark children instead.
+ end
+ end
+
+ class ParenthesesNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ # Never mark ParenthesesNode with a newline flag, mark children instead.
+ end
+ end
+
+ class IfNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ predicate.newline_flag!(lines)
+ end
+ end
- private_constant :Newlines
+ class UnlessNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ predicate.newline_flag!(lines)
+ end
+ end
+
+ class UntilNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ predicate.newline_flag!(lines)
+ end
+ end
+
+ class WhileNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ predicate.newline_flag!(lines)
+ end
+ end
+
+ class RescueModifierNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ expression.newline_flag!(lines)
+ end
+ end
+
+ class InterpolatedMatchLastLineNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ first = parts.first
+ first.newline_flag!(lines) if first
+ end
+ end
+
+ class InterpolatedRegularExpressionNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ first = parts.first
+ first.newline_flag!(lines) if first
+ end
+ end
+
+ class InterpolatedStringNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ first = parts.first
+ first.newline_flag!(lines) if first
+ end
+ end
+
+ class InterpolatedSymbolNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ first = parts.first
+ first.newline_flag!(lines) if first
+ end
+ end
- # Walk the tree and mark nodes that are on a new line.
- def mark_newlines!
- value = self.value
- raise "This method should only be called on a parse result that contains a node" unless Node === value
- value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) # steep:ignore
+ class InterpolatedXStringNode < Node
+ #: (Array[bool] lines) -> void
+ def newline_flag!(lines) # :nodoc:
+ first = parts.first
+ first.newline_flag!(lines) if first
end
end
end
diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb
index e12cfd597f..be0493df05 100644
--- a/lib/prism/pattern.rb
+++ b/lib/prism/pattern.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# A pattern is an object that wraps a Ruby pattern matching expression. The
@@ -40,7 +43,9 @@ module Prism
class CompilationError < StandardError
# Create a new CompilationError with the given representation of the node
# that caused the error.
- def initialize(repr)
+ #--
+ #: (String repr) -> void
+ def initialize(repr) # :nodoc:
super(<<~ERROR)
prism was unable to compile the pattern you provided into a usable
expression. It failed on to understand the node represented by:
@@ -56,10 +61,13 @@ module Prism
end
# The query that this pattern was initialized with.
- attr_reader :query
+ attr_reader :query #: String
+ # @rbs @compiled: Proc?
# Create a new pattern with the given query. The query should be a string
# containing a Ruby pattern matching expression.
+ #--
+ #: (String query) -> void
def initialize(query)
@query = query
@compiled = nil
@@ -67,6 +75,8 @@ module Prism
# Compile the query into a callable object that can be used to match against
# nodes.
+ #--
+ #: () -> Proc
def compile
result = Prism.parse("case nil\nin #{query}\nend")
@@ -83,7 +93,10 @@ module Prism
# pattern. If a block is given, it will be called with each node that
# matches the pattern. If no block is given, an enumerator will be returned
# that will yield each node that matches the pattern.
- def scan(root)
+ #--
+ #: (node root) -> Enumerator[node, void]
+ #: (node root) { (node) -> void } -> void
+ def scan(root, &blk)
return to_enum(:scan, root) unless block_given?
@compiled ||= compile
@@ -99,23 +112,33 @@ module Prism
# Shortcut for combining two procs into one that returns true if both return
# true.
- def combine_and(left, right)
+ #--
+ #: (Proc left, Proc right) -> Proc
+ def combine_and(left, right) # :nodoc:
->(other) { left.call(other) && right.call(other) }
end
# Shortcut for combining two procs into one that returns true if either
# returns true.
- def combine_or(left, right)
+ #--
+ #: (Proc left, Proc right) -> Proc
+ def combine_or(left, right) # :nodoc:
->(other) { left.call(other) || right.call(other) }
end
- # Raise an error because the given node is not supported.
- def compile_error(node)
+ # Raise an error because the given node is not supported. Note purposefully
+ # not typing this method since it is a no return method that Steep does not
+ # understand.
+ #--
+ #: (node node) -> bot
+ def compile_error(node) # :nodoc:
raise CompilationError, node.inspect
end
# in [foo, bar, baz]
- def compile_array_pattern_node(node)
+ #--
+ #: (ArrayPatternNode node) -> Proc
+ def compile_array_pattern_node(node) # :nodoc:
compile_error(node) if !node.rest.nil? || node.posts.any?
constant = node.constant
@@ -140,16 +163,23 @@ module Prism
end
# in foo | bar
- def compile_alternation_pattern_node(node)
+ #--
+ #: (AlternationPatternNode node) -> Proc
+ def compile_alternation_pattern_node(node) # :nodoc:
combine_or(compile_node(node.left), compile_node(node.right))
end
# in Prism::ConstantReadNode
- def compile_constant_path_node(node)
+ #--
+ #: (ConstantPathNode node) -> Proc
+ def compile_constant_path_node(node) # :nodoc:
parent = node.parent
if parent.is_a?(ConstantReadNode) && parent.slice == "Prism"
- compile_node(node.child)
+ name = node.name
+ raise CompilationError, node.inspect if name.nil?
+
+ compile_constant_name(node, name)
else
compile_error(node)
end
@@ -157,15 +187,22 @@ module Prism
# in ConstantReadNode
# in String
- def compile_constant_read_node(node)
- value = node.slice
+ #--
+ #: (ConstantReadNode node) -> Proc
+ def compile_constant_read_node(node) # :nodoc:
+ compile_constant_name(node, node.name)
+ end
- if Prism.const_defined?(value, false)
- clazz = Prism.const_get(value)
+ # Compile a name associated with a constant.
+ #--
+ #: ((ConstantPathNode | ConstantReadNode) node, Symbol name) -> Proc
+ def compile_constant_name(node, name) # :nodoc:
+ if Prism.const_defined?(name, false)
+ clazz = Prism.const_get(name)
->(other) { clazz === other }
- elsif Object.const_defined?(value, false)
- clazz = Object.const_get(value)
+ elsif Object.const_defined?(name, false)
+ clazz = Object.const_get(name)
->(other) { clazz === other }
else
@@ -175,9 +212,14 @@ module Prism
# in InstanceVariableReadNode[name: Symbol]
# in { name: Symbol }
- def compile_hash_pattern_node(node)
+ #--
+ #: (HashPatternNode node) -> Proc
+ def compile_hash_pattern_node(node) # :nodoc:
compile_error(node) if node.rest
- compiled_constant = compile_node(node.constant) if node.constant
+
+ if (constant = node.constant)
+ compiled_constant = compile_node(constant)
+ end
preprocessed =
node.elements.to_h do |element|
@@ -205,12 +247,16 @@ module Prism
end
# in nil
- def compile_nil_node(node)
+ #--
+ #: (NilNode node) -> Proc
+ def compile_nil_node(node) # :nodoc:
->(attribute) { attribute.nil? }
end
# in /foo/
- def compile_regular_expression_node(node)
+ #--
+ #: (RegularExpressionNode node) -> Proc
+ def compile_regular_expression_node(node) # :nodoc:
regexp = Regexp.new(node.unescaped, node.closing[1..])
->(attribute) { regexp === attribute }
@@ -218,7 +264,9 @@ module Prism
# in ""
# in "foo"
- def compile_string_node(node)
+ #--
+ #: (StringNode node) -> Proc
+ def compile_string_node(node) # :nodoc:
string = node.unescaped
->(attribute) { string === attribute }
@@ -226,7 +274,9 @@ module Prism
# in :+
# in :foo
- def compile_symbol_node(node)
+ #--
+ #: (SymbolNode node) -> Proc
+ def compile_symbol_node(node) # :nodoc:
symbol = node.unescaped.to_sym
->(attribute) { symbol === attribute }
@@ -234,7 +284,9 @@ module Prism
# Compile any kind of node. Dispatch out to the individual compilation
# methods based on the type of node.
- def compile_node(node)
+ #--
+ #: (node node) -> Proc
+ def compile_node(node) # :nodoc:
case node
when AlternationPatternNode
compile_alternation_pattern_node(node)
diff --git a/lib/prism/polyfill/append_as_bytes.rb b/lib/prism/polyfill/append_as_bytes.rb
new file mode 100644
index 0000000000..24218bd171
--- /dev/null
+++ b/lib/prism/polyfill/append_as_bytes.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+# Polyfill for String#append_as_bytes, which didn't exist until Ruby 3.4.
+if !("".respond_to?(:append_as_bytes))
+ String.include(
+ Module.new {
+ def append_as_bytes(*args)
+ args.each do |arg|
+ arg = Integer === arg ? [arg].pack("C") : arg.b
+ self.<<(arg) # steep:ignore
+ end
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/byteindex.rb b/lib/prism/polyfill/byteindex.rb
new file mode 100644
index 0000000000..98c6089f14
--- /dev/null
+++ b/lib/prism/polyfill/byteindex.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+# Polyfill for String#byteindex, which didn't exist until Ruby 3.2.
+if !("".respond_to?(:byteindex))
+ String.include(
+ Module.new {
+ def byteindex(needle, offset = 0)
+ charindex = index(needle, offset)
+ slice(0...charindex).bytesize if charindex
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/scan_byte.rb b/lib/prism/polyfill/scan_byte.rb
new file mode 100644
index 0000000000..9276e509fc
--- /dev/null
+++ b/lib/prism/polyfill/scan_byte.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+require "strscan"
+
+# Polyfill for StringScanner#scan_byte, which didn't exist until Ruby 3.4.
+if !(StringScanner.method_defined?(:scan_byte))
+ StringScanner.include(
+ Module.new {
+ def scan_byte # :nodoc:
+ get_byte&.b&.ord
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/string.rb b/lib/prism/polyfill/string.rb
deleted file mode 100644
index 582266d956..0000000000
--- a/lib/prism/polyfill/string.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-# Polyfill for String#unpack1 with the offset parameter.
-if String.instance_method(:unpack1).parameters.none? { |_, name| name == :offset }
- String.prepend(
- Module.new {
- def unpack1(format, offset: 0) # :nodoc:
- offset == 0 ? super(format) : self[offset..].unpack1(format) # steep:ignore
- end
- }
- )
-end
diff --git a/lib/prism/polyfill/unpack1.rb b/lib/prism/polyfill/unpack1.rb
new file mode 100644
index 0000000000..3fa9b5a0c5
--- /dev/null
+++ b/lib/prism/polyfill/unpack1.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+# Polyfill for String#unpack1 with the offset parameter. Not all Ruby engines
+# have Method#parameters implemented, so we check the arity instead if
+# necessary.
+if (unpack1 = String.instance_method(:unpack1)).respond_to?(:parameters) ? unpack1.parameters.none? { |_, name| name == :offset } : (unpack1.arity == 1)
+ String.prepend(
+ Module.new {
+ def unpack1(format, offset: 0) # :nodoc:
+ offset == 0 ? super(format) : self[offset..].unpack1(format) # steep:ignore
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/warn.rb b/lib/prism/polyfill/warn.rb
new file mode 100644
index 0000000000..76a4264623
--- /dev/null
+++ b/lib/prism/polyfill/warn.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+# Polyfill for Kernel#warn with the category parameter. Not all Ruby engines
+# have Method#parameters implemented, so we check the arity instead if
+# necessary.
+if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.parameters.none? { |_, name| name == :category } : (method.arity == -1)
+ Kernel.prepend(
+ Module.new {
+ def warn(*msgs, uplevel: nil, category: nil) # :nodoc:
+ case uplevel
+ when nil
+ super(*msgs)
+ when Integer
+ super(*msgs, uplevel: uplevel + 1)
+ else
+ super(*msgs, uplevel: uplevel.to_int + 1)
+ end
+ end
+ }
+ )
+
+ Object.prepend(
+ Module.new {
+ def warn(*msgs, uplevel: nil, category: nil) # :nodoc:
+ case uplevel
+ when nil
+ super(*msgs)
+ when Integer
+ super(*msgs, uplevel: uplevel + 1)
+ else
+ super(*msgs, uplevel: uplevel.to_int + 1)
+ end
+ end
+ }
+ )
+end
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index d43fcfd36b..aac056b3f8 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -2,7 +2,7 @@
Gem::Specification.new do |spec|
spec.name = "prism"
- spec.version = "0.24.0"
+ spec.version = "1.9.0"
spec.authors = ["Shopify"]
spec.email = ["ruby@shopify.com"]
@@ -35,121 +35,194 @@ Gem::Specification.new do |spec|
"docs/parser_translation.md",
"docs/parsing_rules.md",
"docs/releasing.md",
+ "docs/relocation.md",
"docs/ripper_translation.md",
"docs/ruby_api.md",
"docs/ruby_parser_translation.md",
"docs/serialization.md",
"docs/testing.md",
"ext/prism/api_node.c",
- "ext/prism/api_pack.c",
+ "ext/prism/extconf.rb",
"ext/prism/extension.c",
"ext/prism/extension.h",
"include/prism.h",
+ "include/prism/compiler/accel.h",
+ "include/prism/compiler/align.h",
+ "include/prism/compiler/exported.h",
+ "include/prism/compiler/fallthrough.h",
+ "include/prism/compiler/filesystem.h",
+ "include/prism/compiler/flex_array.h",
+ "include/prism/compiler/force_inline.h",
+ "include/prism/compiler/format.h",
+ "include/prism/compiler/inline.h",
+ "include/prism/compiler/nodiscard.h",
+ "include/prism/compiler/nonnull.h",
+ "include/prism/compiler/unused.h",
+ "include/prism/internal/allocator.h",
+ "include/prism/internal/allocator_debug.h",
+ "include/prism/internal/arena.h",
+ "include/prism/internal/bit.h",
+ "include/prism/internal/buffer.h",
+ "include/prism/internal/char.h",
+ "include/prism/internal/comments.h",
+ "include/prism/internal/constant_pool.h",
+ "include/prism/internal/diagnostic.h",
+ "include/prism/internal/encoding.h",
+ "include/prism/internal/integer.h",
+ "include/prism/internal/isinf.h",
+ "include/prism/internal/line_offset_list.h",
+ "include/prism/internal/list.h",
+ "include/prism/internal/magic_comments.h",
+ "include/prism/internal/memchr.h",
+ "include/prism/internal/node.h",
+ "include/prism/internal/options.h",
+ "include/prism/internal/parser.h",
+ "include/prism/internal/regexp.h",
+ "include/prism/internal/serialize.h",
+ "include/prism/internal/source.h",
+ "include/prism/internal/static_literals.h",
+ "include/prism/internal/strncasecmp.h",
+ "include/prism/internal/stringy.h",
+ "include/prism/internal/strpbrk.h",
+ "include/prism/internal/tokens.h",
+ "include/prism/arena.h",
"include/prism/ast.h",
- "include/prism/defines.h",
+ "include/prism/buffer.h",
+ "include/prism/comments.h",
+ "include/prism/constant_pool.h",
"include/prism/diagnostic.h",
- "include/prism/encoding.h",
+ "include/prism/excludes.h",
+ "include/prism/integer.h",
+ "include/prism/json.h",
+ "include/prism/line_offset_list.h",
+ "include/prism/magic_comments.h",
"include/prism/node.h",
"include/prism/options.h",
- "include/prism/pack.h",
"include/prism/parser.h",
"include/prism/prettyprint.h",
- "include/prism/regexp.h",
- "include/prism/static_literals.h",
- "include/prism/util/pm_buffer.h",
- "include/prism/util/pm_char.h",
- "include/prism/util/pm_constant_pool.h",
- "include/prism/util/pm_integer.h",
- "include/prism/util/pm_list.h",
- "include/prism/util/pm_memchr.h",
- "include/prism/util/pm_newline_list.h",
- "include/prism/util/pm_state_stack.h",
- "include/prism/util/pm_strncasecmp.h",
- "include/prism/util/pm_string.h",
- "include/prism/util/pm_string_list.h",
- "include/prism/util/pm_strpbrk.h",
+ "include/prism/serialize.h",
+ "include/prism/source.h",
+ "include/prism/stream.h",
+ "include/prism/string_query.h",
+ "include/prism/stringy.h",
"include/prism/version.h",
"lib/prism.rb",
"lib/prism/compiler.rb",
- "lib/prism/debug.rb",
"lib/prism/desugar_compiler.rb",
"lib/prism/dispatcher.rb",
"lib/prism/dot_visitor.rb",
"lib/prism/dsl.rb",
"lib/prism/ffi.rb",
+ "lib/prism/inspect_visitor.rb",
"lib/prism/lex_compat.rb",
"lib/prism/mutation_compiler.rb",
"lib/prism/node_ext.rb",
- "lib/prism/node_inspector.rb",
+ "lib/prism/node_find.rb",
"lib/prism/node.rb",
- "lib/prism/pack.rb",
"lib/prism/parse_result.rb",
"lib/prism/parse_result/comments.rb",
+ "lib/prism/parse_result/errors.rb",
"lib/prism/parse_result/newlines.rb",
"lib/prism/pattern.rb",
- "lib/prism/polyfill/string.rb",
+ "lib/prism/polyfill/append_as_bytes.rb",
+ "lib/prism/polyfill/byteindex.rb",
+ "lib/prism/polyfill/scan_byte.rb",
+ "lib/prism/polyfill/unpack1.rb",
+ "lib/prism/polyfill/warn.rb",
+ "lib/prism/reflection.rb",
+ "lib/prism/relocation.rb",
"lib/prism/serialize.rb",
+ "lib/prism/string_query.rb",
"lib/prism/translation.rb",
"lib/prism/translation/parser.rb",
- "lib/prism/translation/parser33.rb",
- "lib/prism/translation/parser34.rb",
+ "lib/prism/translation/parser_current.rb",
+ "lib/prism/translation/parser_versions.rb",
+ "lib/prism/translation/parser/builder.rb",
"lib/prism/translation/parser/compiler.rb",
"lib/prism/translation/parser/lexer.rb",
- "lib/prism/translation/parser/rubocop.rb",
"lib/prism/translation/ripper.rb",
+ "lib/prism/translation/ripper/filter.rb",
+ "lib/prism/translation/ripper/lexer.rb",
"lib/prism/translation/ripper/sexp.rb",
"lib/prism/translation/ripper/shim.rb",
"lib/prism/translation/ruby_parser.rb",
"lib/prism/visitor.rb",
+ "prism.gemspec",
+ "rbi/generated/prism.rbi",
+ "rbi/generated/prism/compiler.rbi",
+ "rbi/generated/prism/desugar_compiler.rbi",
+ "rbi/generated/prism/dispatcher.rbi",
+ "rbi/generated/prism/dot_visitor.rbi",
+ "rbi/generated/prism/dsl.rbi",
+ "rbi/generated/prism/inspect_visitor.rbi",
+ "rbi/generated/prism/lex_compat.rbi",
+ "rbi/generated/prism/mutation_compiler.rbi",
+ "rbi/generated/prism/node.rbi",
+ "rbi/generated/prism/node_ext.rbi",
+ "rbi/generated/prism/node_find.rbi",
+ "rbi/generated/prism/parse_result.rbi",
+ "rbi/generated/prism/pattern.rbi",
+ "rbi/generated/prism/reflection.rbi",
+ "rbi/generated/prism/relocation.rbi",
+ "rbi/generated/prism/serialize.rbi",
+ "rbi/generated/prism/string_query.rbi",
+ "rbi/generated/prism/translation.rbi",
+ "rbi/generated/prism/visitor.rbi",
+ "rbi/generated/prism/parse_result/comments.rbi",
+ "rbi/generated/prism/parse_result/errors.rbi",
+ "rbi/generated/prism/parse_result/newlines.rbi",
+ "rbi/prism/translation/parser.rbi",
+ "rbi/prism/translation/parser_versions.rbi",
+ "rbi/prism/translation/ripper.rbi",
+ "rbi/rubyvm/node_find.rbi",
+ "sig/generated/prism.rbs",
+ "sig/generated/prism/compiler.rbs",
+ "sig/generated/prism/desugar_compiler.rbs",
+ "sig/generated/prism/dispatcher.rbs",
+ "sig/generated/prism/dot_visitor.rbs",
+ "sig/generated/prism/dsl.rbs",
+ "sig/generated/prism/inspect_visitor.rbs",
+ "sig/generated/prism/lex_compat.rbs",
+ "sig/generated/prism/mutation_compiler.rbs",
+ "sig/generated/prism/node.rbs",
+ "sig/generated/prism/node_ext.rbs",
+ "sig/generated/prism/node_find.rbs",
+ "sig/generated/prism/parse_result.rbs",
+ "sig/generated/prism/pattern.rbs",
+ "sig/generated/prism/reflection.rbs",
+ "sig/generated/prism/relocation.rbs",
+ "sig/generated/prism/serialize.rbs",
+ "sig/generated/prism/string_query.rbs",
+ "sig/generated/prism/translation.rbs",
+ "sig/generated/prism/visitor.rbs",
+ "sig/generated/prism/parse_result/comments.rbs",
+ "sig/generated/prism/parse_result/errors.rbs",
+ "sig/generated/prism/parse_result/newlines.rbs",
+ "src/arena.c",
+ "src/buffer.c",
+ "src/char.c",
+ "src/constant_pool.c",
"src/diagnostic.c",
"src/encoding.c",
+ "src/integer.c",
+ "src/json.c",
+ "src/line_offset_list.c",
+ "src/list.c",
+ "src/memchr.c",
"src/node.c",
- "src/pack.c",
+ "src/options.c",
+ "src/parser.c",
"src/prettyprint.c",
+ "src/prism.c",
"src/regexp.c",
"src/serialize.c",
+ "src/source.c",
"src/static_literals.c",
- "src/token_type.c",
- "src/util/pm_buffer.c",
- "src/util/pm_char.c",
- "src/util/pm_constant_pool.c",
- "src/util/pm_integer.c",
- "src/util/pm_list.c",
- "src/util/pm_memchr.c",
- "src/util/pm_newline_list.c",
- "src/util/pm_state_stack.c",
- "src/util/pm_string.c",
- "src/util/pm_string_list.c",
- "src/util/pm_strncasecmp.c",
- "src/util/pm_strpbrk.c",
- "src/options.c",
- "src/prism.c",
- "prism.gemspec",
- "sig/prism.rbs",
- "sig/prism/compiler.rbs",
- "sig/prism/dispatcher.rbs",
- "sig/prism/dot_visitor.rbs",
- "sig/prism/dsl.rbs",
- "sig/prism/mutation_compiler.rbs",
- "sig/prism/node.rbs",
- "sig/prism/node_ext.rbs",
- "sig/prism/pack.rbs",
- "sig/prism/parse_result.rbs",
- "sig/prism/pattern.rbs",
- "sig/prism/serialize.rbs",
- "sig/prism/visitor.rbs",
- "rbi/prism.rbi",
- "rbi/prism/compiler.rbi",
- "rbi/prism/desugar_compiler.rbi",
- "rbi/prism/mutation_compiler.rbi",
- "rbi/prism/node_ext.rbi",
- "rbi/prism/node.rbi",
- "rbi/prism/parse_result.rbi",
- "rbi/prism/translation/parser/compiler.rbi",
- "rbi/prism/translation/ripper.rbi",
- "rbi/prism/translation/ripper/ripper_compiler.rbi",
- "rbi/prism/translation/ruby_parser.rbi",
- "rbi/prism/visitor.rbi"
+ "src/string_query.c",
+ "src/stringy.c",
+ "src/strncasecmp.c",
+ "src/strpbrk.c",
+ "src/tokens.c"
]
spec.extensions = ["ext/prism/extconf.rb"]
diff --git a/lib/prism/relocation.rb b/lib/prism/relocation.rb
new file mode 100644
index 0000000000..af0f792827
--- /dev/null
+++ b/lib/prism/relocation.rb
@@ -0,0 +1,665 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+
+module Prism
+ # Prism parses deterministically for the same input. This provides a nice
+ # property that is exposed through the #node_id API on nodes. Effectively this
+ # means that for the same input, these values will remain consistent every
+ # time the source is parsed. This means we can reparse the source same with a
+ # #node_id value and find the exact same node again.
+ #
+ # The Relocation module provides an API around this property. It allows you to
+ # "save" nodes and locations using a minimal amount of memory (just the
+ # node_id and a field identifier) and then reify them later.
+ module Relocation
+ # @rbs!
+ # type entry_value = untyped
+ # type entry_values = Hash[Symbol, entry_value]
+ #
+ # interface _Value
+ # def start_line: () -> Integer
+ # def end_line: () -> Integer
+ # def start_offset: () -> Integer
+ # def end_offset: () -> Integer
+ # def start_character_offset: () -> Integer
+ # def end_character_offset: () -> Integer
+ # def cached_start_code_units_offset: (_CodeUnitsCache cache) -> Integer
+ # def cached_end_code_units_offset: (_CodeUnitsCache cache) -> Integer
+ # def start_column: () -> Integer
+ # def end_column: () -> Integer
+ # def start_character_column: () -> Integer
+ # def end_character_column: () -> Integer
+ # def cached_start_code_units_column: (_CodeUnitsCache cache) -> Integer
+ # def cached_end_code_units_column: (_CodeUnitsCache cache) -> Integer
+ # def leading_comments: () -> Array[Comment]
+ # def trailing_comments: () -> Array[Comment]
+ # end
+ #
+ # interface _Field
+ # def fields: (_Value value) -> entry_values
+ # end
+
+ # An entry in a repository that will lazily reify its values when they are
+ # first accessed.
+ class Entry
+ # Raised if a value that could potentially be on an entry is missing
+ # because it was either not configured on the repository or it has not yet
+ # been fetched.
+ class MissingValueError < StandardError
+ end
+
+ # @rbs @repository: Repository?
+ # @rbs @values: Hash[Symbol, untyped]?
+
+ # Initialize a new entry with the given repository.
+ #--
+ #: (Repository repository) -> void
+ def initialize(repository)
+ @repository = repository
+ @values = nil
+ end
+
+ # Fetch the filepath of the value.
+ #--
+ #: () -> String
+ def filepath
+ fetch_value(:filepath)
+ end
+
+ # Fetch the start line of the value.
+ #--
+ #: () -> Integer
+ def start_line
+ fetch_value(:start_line)
+ end
+
+ # Fetch the end line of the value.
+ #--
+ #: () -> Integer
+ def end_line
+ fetch_value(:end_line)
+ end
+
+ # Fetch the start byte offset of the value.
+ #--
+ #: () -> Integer
+ def start_offset
+ fetch_value(:start_offset)
+ end
+
+ # Fetch the end byte offset of the value.
+ #--
+ #: () -> Integer
+ def end_offset
+ fetch_value(:end_offset)
+ end
+
+ # Fetch the start character offset of the value.
+ #--
+ #: () -> Integer
+ def start_character_offset
+ fetch_value(:start_character_offset)
+ end
+
+ # Fetch the end character offset of the value.
+ #--
+ #: () -> Integer
+ def end_character_offset
+ fetch_value(:end_character_offset)
+ end
+
+ # Fetch the start code units offset of the value, for the encoding that
+ # was configured on the repository.
+ #--
+ #: () -> Integer
+ def start_code_units_offset
+ fetch_value(:start_code_units_offset)
+ end
+
+ # Fetch the end code units offset of the value, for the encoding that was
+ # configured on the repository.
+ #--
+ #: () -> Integer
+ def end_code_units_offset
+ fetch_value(:end_code_units_offset)
+ end
+
+ # Fetch the start byte column of the value.
+ #--
+ #: () -> Integer
+ def start_column
+ fetch_value(:start_column)
+ end
+
+ # Fetch the end byte column of the value.
+ #--
+ #: () -> Integer
+ def end_column
+ fetch_value(:end_column)
+ end
+
+ # Fetch the start character column of the value.
+ #--
+ #: () -> Integer
+ def start_character_column
+ fetch_value(:start_character_column)
+ end
+
+ # Fetch the end character column of the value.
+ #--
+ #: () -> Integer
+ def end_character_column
+ fetch_value(:end_character_column)
+ end
+
+ # Fetch the start code units column of the value, for the encoding that
+ # was configured on the repository.
+ #--
+ #: () -> Integer
+ def start_code_units_column
+ fetch_value(:start_code_units_column)
+ end
+
+ # Fetch the end code units column of the value, for the encoding that was
+ # configured on the repository.
+ #--
+ #: () -> Integer
+ def end_code_units_column
+ fetch_value(:end_code_units_column)
+ end
+
+ # Fetch the leading comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
+ def leading_comments
+ fetch_value(:leading_comments)
+ end
+
+ # Fetch the trailing comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
+ def trailing_comments
+ fetch_value(:trailing_comments)
+ end
+
+ # Fetch the leading and trailing comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
+ def comments
+ [*leading_comments, *trailing_comments]
+ end
+
+ # Reify the values on this entry with the given values. This is an
+ # internal-only API that is called from the repository when it is time to
+ # reify the values.
+ #--
+ #: (entry_values values) -> void
+ def reify!(values) # :nodoc:
+ @repository = nil
+ @values = values
+ end
+
+ private
+
+ # Fetch a value from the entry, raising an error if it is missing.
+ #--
+ #: (Symbol name) -> entry_value
+ def fetch_value(name)
+ values.fetch(name) do
+ raise MissingValueError, "No value for #{name}, make sure the " \
+ "repository has been properly configured"
+ end
+ end
+
+ # Return the values from the repository, reifying them if necessary.
+ #--
+ #: () -> entry_values
+ def values
+ @values || (@repository&.reify!; @values) #: entry_values
+ end
+ end
+
+ # Represents the source of a repository that will be reparsed.
+ class Source
+ # The value that will need to be reparsed.
+ attr_reader :value #: untyped
+
+ # Initialize the source with the given value.
+ #--
+ #: (untyped value) -> void
+ def initialize(value)
+ @value = value
+ end
+
+ # Reparse the value and return the parse result.
+ #--
+ #: () -> ParseResult
+ def result
+ raise NotImplementedError, "Subclasses must implement #result"
+ end
+
+ # Create a code units cache for the given encoding.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
+ def code_units_cache(encoding)
+ result.code_units_cache(encoding)
+ end
+ end
+
+ # A source that is represented by a file path.
+ class SourceFilepath < Source
+ # Reparse the file and return the parse result.
+ #--
+ #: () -> ParseResult
+ def result
+ Prism.parse_file(value)
+ end
+ end
+
+ # A source that is represented by a string.
+ class SourceString < Source
+ # Reparse the string and return the parse result.
+ #--
+ #: () -> ParseResult
+ def result
+ Prism.parse(value)
+ end
+ end
+
+ # A field that represents the file path.
+ class FilepathField
+ # The file path that this field represents.
+ attr_reader :value #: String
+
+ # Initialize a new field with the given file path.
+ #--
+ #: (String value) -> void
+ def initialize(value)
+ @value = value
+ end
+
+ # Fetch the file path.
+ #--
+ #: (_Value _value) -> entry_values
+ def fields(_value)
+ { filepath: value }
+ end
+ end
+
+ # A field representing the start and end lines.
+ class LinesField
+ # Fetches the start and end line of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ { start_line: value.start_line, end_line: value.end_line }
+ end
+ end
+
+ # A field representing the start and end byte offsets.
+ class OffsetsField
+ # Fetches the start and end byte offset of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ { start_offset: value.start_offset, end_offset: value.end_offset }
+ end
+ end
+
+ # A field representing the start and end character offsets.
+ class CharacterOffsetsField
+ # Fetches the start and end character offset of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ {
+ start_character_offset: value.start_character_offset,
+ end_character_offset: value.end_character_offset
+ }
+ end
+ end
+
+ # A field representing the start and end code unit offsets.
+ class CodeUnitOffsetsField
+ # A pointer to the repository object that is used for lazily creating a
+ # code units cache.
+ attr_reader :repository #: Repository
+
+ # The associated encoding for the code units.
+ attr_reader :encoding #: Encoding
+
+ # @rbs @cache: _CodeUnitsCache?
+
+ # Initialize a new field with the associated repository and encoding.
+ #--
+ #: (Repository repository, Encoding encoding) -> void
+ def initialize(repository, encoding)
+ @repository = repository
+ @encoding = encoding
+ @cache = nil
+ end
+
+ # Fetches the start and end code units offset of a value for a particular
+ # encoding.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ {
+ start_code_units_offset: value.cached_start_code_units_offset(cache),
+ end_code_units_offset: value.cached_end_code_units_offset(cache)
+ }
+ end
+
+ private
+
+ # Lazily create a code units cache for the associated encoding.
+ #--
+ #: () -> _CodeUnitsCache
+ def cache
+ @cache ||= repository.code_units_cache(encoding)
+ end
+ end
+
+ # A field representing the start and end byte columns.
+ class ColumnsField
+ # Fetches the start and end byte column of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ { start_column: value.start_column, end_column: value.end_column }
+ end
+ end
+
+ # A field representing the start and end character columns.
+ class CharacterColumnsField
+ # Fetches the start and end character column of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ {
+ start_character_column: value.start_character_column,
+ end_character_column: value.end_character_column
+ }
+ end
+ end
+
+ # A field representing the start and end code unit columns for a specific
+ # encoding.
+ class CodeUnitColumnsField
+ # The repository object that is used for lazily creating a code units
+ # cache.
+ attr_reader :repository #: Repository
+
+ # The associated encoding for the code units.
+ attr_reader :encoding #: Encoding
+
+ # @rbs @cache: _CodeUnitsCache?
+
+ # Initialize a new field with the associated repository and encoding.
+ #--
+ #: (Repository repository, Encoding encoding) -> void
+ def initialize(repository, encoding)
+ @repository = repository
+ @encoding = encoding
+ @cache = nil
+ end
+
+ # Fetches the start and end code units column of a value for a particular
+ # encoding.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ {
+ start_code_units_column: value.cached_start_code_units_column(cache),
+ end_code_units_column: value.cached_end_code_units_column(cache)
+ }
+ end
+
+ private
+
+ # Lazily create a code units cache for the associated encoding.
+ #--
+ #: () -> _CodeUnitsCache
+ def cache
+ @cache ||= repository.code_units_cache(encoding)
+ end
+ end
+
+ # An abstract field used as the parent class of the two comments fields.
+ class CommentsField
+ # An object that represents a slice of a comment.
+ class Comment
+ # The slice of the comment.
+ attr_reader :slice #: String
+
+ # Initialize a new comment with the given slice.
+ #
+ #: (String slice) -> void
+ def initialize(slice)
+ @slice = slice
+ end
+ end
+
+ private
+
+ # Create comment objects from the given values.
+ #--
+ #: (entry_value values) -> Array[Comment]
+ def comments(values)
+ values.map { |value| Comment.new(value.slice) }
+ end
+ end
+
+ # A field representing the leading comments.
+ class LeadingCommentsField < CommentsField
+ # Fetches the leading comments of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ { leading_comments: comments(value.leading_comments) }
+ end
+ end
+
+ # A field representing the trailing comments.
+ class TrailingCommentsField < CommentsField
+ # Fetches the trailing comments of a value.
+ #--
+ #: (_Value value) -> entry_values
+ def fields(value)
+ { trailing_comments: comments(value.trailing_comments) }
+ end
+ end
+
+ # A repository is a configured collection of fields and a set of entries
+ # that knows how to reparse a source and reify the values.
+ class Repository
+ # Raised when multiple fields of the same type are configured on the same
+ # repository.
+ class ConfigurationError < StandardError
+ end
+
+ # The source associated with this repository. This will be either a
+ # SourceFilepath (the most common use case) or a SourceString.
+ attr_reader :source #: Source
+
+ # The fields that have been configured on this repository.
+ attr_reader :fields #: Hash[Symbol, _Field]
+
+ # The entries that have been saved on this repository.
+ attr_reader :entries #: Hash[Integer, Hash[Symbol, Entry]]
+
+ # Initialize a new repository with the given source.
+ #--
+ #: (Source source) -> void
+ def initialize(source)
+ @source = source
+ @fields = {}
+ @entries = Hash.new { |hash, node_id| hash[node_id] = {} }
+ end
+
+ # Create a code units cache for the given encoding from the source.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
+ def code_units_cache(encoding)
+ source.code_units_cache(encoding)
+ end
+
+ # Configure the filepath field for this repository and return self.
+ #--
+ #: () -> self
+ def filepath
+ raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath)
+ field(:filepath, FilepathField.new(source.value))
+ end
+
+ # Configure the lines field for this repository and return self.
+ #--
+ #: () -> self
+ def lines
+ field(:lines, LinesField.new)
+ end
+
+ # Configure the offsets field for this repository and return self.
+ #--
+ #: () -> self
+ def offsets
+ field(:offsets, OffsetsField.new)
+ end
+
+ # Configure the character offsets field for this repository and return
+ # self.
+ #--
+ #: () -> self
+ def character_offsets
+ field(:character_offsets, CharacterOffsetsField.new)
+ end
+
+ # Configure the code unit offsets field for this repository for a specific
+ # encoding and return self.
+ #--
+ #: (Encoding encoding) -> self
+ def code_unit_offsets(encoding)
+ field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding))
+ end
+
+ # Configure the columns field for this repository and return self.
+ #--
+ #: () -> self
+ def columns
+ field(:columns, ColumnsField.new)
+ end
+
+ # Configure the character columns field for this repository and return
+ # self.
+ #--
+ #: () -> self
+ def character_columns
+ field(:character_columns, CharacterColumnsField.new)
+ end
+
+ # Configure the code unit columns field for this repository for a specific
+ # encoding and return self.
+ #--
+ #: (Encoding encoding) -> self
+ def code_unit_columns(encoding)
+ field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding))
+ end
+
+ # Configure the leading comments field for this repository and return
+ # self.
+ #--
+ #: () -> self
+ def leading_comments
+ field(:leading_comments, LeadingCommentsField.new)
+ end
+
+ # Configure the trailing comments field for this repository and return
+ # self.
+ #--
+ #: () -> self
+ def trailing_comments
+ field(:trailing_comments, TrailingCommentsField.new)
+ end
+
+ # Configure both the leading and trailing comment fields for this
+ # repository and return self.
+ #--
+ #: () -> self
+ def comments
+ leading_comments.trailing_comments
+ end
+
+ # This method is called from nodes and locations when they want to enter
+ # themselves into the repository. It it internal-only and meant to be
+ # called from the #save* APIs.
+ #--
+ #: (Integer node_id, Symbol field_name) -> Entry
+ def enter(node_id, field_name) # :nodoc:
+ entry = Entry.new(self)
+ @entries[node_id][field_name] = entry
+ entry
+ end
+
+ # This method is called from the entries in the repository when they need
+ # to reify their values. It is internal-only and meant to be called from
+ # the various value APIs.
+ #--
+ #: () -> void
+ def reify! # :nodoc:
+ result = source.result
+
+ # Attach the comments if they have been requested as part of the
+ # configuration of this repository.
+ if fields.key?(:leading_comments) || fields.key?(:trailing_comments)
+ result.attach_comments!
+ end
+
+ queue = [result.value] #: Array[Prism::node]
+ while (node = queue.shift)
+ @entries[node.node_id].each do |field_name, entry|
+ value = node.public_send(field_name)
+ values = {} #: entry_values
+
+ fields.each_value do |field|
+ values.merge!(field.fields(value))
+ end
+
+ entry.reify!(values)
+ end
+
+ queue.concat(node.compact_child_nodes)
+ end
+
+ @entries.clear
+ end
+
+ private
+
+ # Append the given field to the repository and return the repository so
+ # that these calls can be chained.
+ #--
+ #: (Symbol name, _Field) -> self
+ def field(name, value)
+ raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name)
+ @fields[name] = value
+ self
+ end
+ end
+
+ # Create a new repository for the given filepath.
+ #--
+ #: (String value) -> Repository
+ def self.filepath(value)
+ Repository.new(SourceFilepath.new(value))
+ end
+
+ # Create a new repository for the given string.
+ #--
+ #: (String value) -> Repository
+ def self.string(value)
+ Repository.new(SourceString.new(value))
+ end
+ end
+end
diff --git a/lib/prism/string_query.rb b/lib/prism/string_query.rb
new file mode 100644
index 0000000000..99ce57e5fe
--- /dev/null
+++ b/lib/prism/string_query.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+
+module Prism
+ # Query methods that allow categorizing strings based on their context for
+ # where they could be valid in a Ruby syntax tree.
+ class StringQuery
+ # @rbs!
+ # def self.local?: (String string) -> bool
+ # def self.constant?: (String string) -> bool
+ # def self.method_name?: (String string) -> bool
+
+ # The string that this query is wrapping.
+ attr_reader :string #: String
+
+ # Initialize a new query with the given string.
+ #--
+ #: (String string) -> void
+ def initialize(string)
+ @string = string
+ end
+
+ # Whether or not this string is a valid local variable name.
+ #--
+ #: () -> bool
+ def local?
+ StringQuery.local?(string)
+ end
+
+ # Whether or not this string is a valid constant name.
+ #--
+ #: () -> bool
+ def constant?
+ StringQuery.constant?(string)
+ end
+
+ # Whether or not this string is a valid method name.
+ #--
+ #: () -> bool
+ def method_name?
+ StringQuery.method_name?(string)
+ end
+ end
+end
diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb
index 8b75e8a3ab..5a086a7542 100644
--- a/lib/prism/translation.rb
+++ b/lib/prism/translation.rb
@@ -1,12 +1,19 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# This module is responsible for converting the prism syntax tree into other
# syntax trees.
module Translation # steep:ignore
autoload :Parser, "prism/translation/parser"
- autoload :Parser33, "prism/translation/parser33"
- autoload :Parser34, "prism/translation/parser34"
+ autoload :ParserCurrent, "prism/translation/parser_current"
+ autoload :Parser33, "prism/translation/parser_versions"
+ autoload :Parser34, "prism/translation/parser_versions"
+ autoload :Parser35, "prism/translation/parser_versions"
+ autoload :Parser40, "prism/translation/parser_versions"
+ autoload :Parser41, "prism/translation/parser_versions"
autoload :Ripper, "prism/translation/ripper"
autoload :RubyParser, "prism/translation/ruby_parser"
end
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
index 0d11b8f566..70031f133a 100644
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@@ -1,6 +1,17 @@
# frozen_string_literal: true
-
-require "parser"
+# :markup: markdown
+
+begin
+ required_version = ">= 3.3.7.2"
+ gem "parser", required_version
+ require "parser"
+rescue LoadError
+ warn(<<~MSG)
+ Error: Unable to load parser #{required_version}. \
+ Add `gem "parser"` to your Gemfile or run `bundle update parser`.
+ MSG
+ exit(1)
+end
module Prism
module Translation
@@ -8,6 +19,13 @@ module Prism
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
+ #
+ # Note that this version of the parser always parses using the latest
+ # version of Ruby syntax supported by Prism. If you want specific version
+ # support, use one of the version-specific subclasses, such as
+ # `Prism::Translation::Parser34`. If you want to parse using the same
+ # version of Ruby syntax as the currently running version of Ruby, use
+ # `Prism::Translation::ParserCurrent`.
class Parser < ::Parser::Base
Diagnostic = ::Parser::Diagnostic # :nodoc:
private_constant :Diagnostic
@@ -15,7 +33,7 @@ module Prism
# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
- class PrismDiagnostic < Diagnostic
+ class PrismDiagnostic < Diagnostic # :nodoc:
# This is the cached message coming from prism.
attr_reader :message
@@ -28,8 +46,45 @@ module Prism
Racc_debug_parser = false # :nodoc:
+ # The `builder` argument is used to create the parser using our custom builder class by default.
+ #
+ # By using the `:parser` keyword argument, you can translate in a way that is compatible with
+ # the Parser gem using any parser.
+ #
+ # For example, in RuboCop for Ruby LSP, the following approach can be used to improve performance
+ # by reusing a pre-parsed `Prism::ParseLexResult`:
+ #
+ # class PrismPreparsed
+ # def initialize(prism_result)
+ # @prism_result = prism_result
+ # end
+ #
+ # def parse_lex(source, **options)
+ # @prism_result
+ # end
+ # end
+ #
+ # prism_preparsed = PrismPreparsed.new(prism_result)
+ #
+ # Prism::Translation::Ruby34.new(builder, parser: prism_preparsed)
+ #
+ # In an object passed to the `:parser` keyword argument, the `parse` and `parse_lex` methods
+ # should be implemented as needed.
+ #
+ def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
+ if !builder.is_a?(Prism::Translation::Parser::Builder)
+ warn(<<~MSG, uplevel: 1, category: :deprecated)
+ [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
+ `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
+ MSG
+ end
+ @parser = parser
+
+ super(builder)
+ end
+
def version # :nodoc:
- 34
+ 41
end
# The default encoding for Ruby files is UTF-8.
@@ -46,7 +101,7 @@ module Prism
source = source_buffer.source
offset_cache = build_offset_cache(source)
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
+ result = unwrap(@parser.parse(source, **prism_options), offset_cache)
build_ast(result.value, offset_cache)
ensure
@@ -59,7 +114,7 @@ module Prism
source = source_buffer.source
offset_cache = build_offset_cache(source)
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
+ result = unwrap(@parser.parse(source, **prism_options), offset_cache)
[
build_ast(result.value, offset_cache),
@@ -78,7 +133,7 @@ module Prism
offset_cache = build_offset_cache(source)
result =
begin
- unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version)), offset_cache)
+ unwrap(@parser.parse_lex(source, **prism_options), offset_cache)
rescue ::Parser::SyntaxError
raise if !recover
end
@@ -149,17 +204,17 @@ module Prism
Diagnostic.new(:error, :endless_setter, {}, diagnostic_location, [])
when :embdoc_term
Diagnostic.new(:error, :embedded_document, {}, diagnostic_location, [])
- when :incomplete_variable_class, :incomplete_variable_class_3_3_0
+ when :incomplete_variable_class, :incomplete_variable_class_3_3
location = location.copy(length: location.length + 1)
diagnostic_location = build_range(location, offset_cache)
Diagnostic.new(:error, :cvar_name, { name: location.slice }, diagnostic_location, [])
- when :incomplete_variable_instance, :incomplete_variable_instance_3_3_0
+ when :incomplete_variable_instance, :incomplete_variable_instance_3_3
location = location.copy(length: location.length + 1)
diagnostic_location = build_range(location, offset_cache)
Diagnostic.new(:error, :ivar_name, { name: location.slice }, diagnostic_location, [])
- when :invalid_variable_global, :invalid_variable_global_3_3_0
+ when :invalid_variable_global, :invalid_variable_global_3_3
Diagnostic.new(:error, :gvar_name, { name: location.slice }, diagnostic_location, [])
when :module_in_method
Diagnostic.new(:error, :module_in_def, {}, diagnostic_location, [])
@@ -280,18 +335,37 @@ module Prism
)
end
+ # Options for how prism should parse/lex the source.
+ def prism_options
+ options = {
+ filepath: @source_buffer.name,
+ version: convert_for_prism(version),
+ partial_script: true,
+ }
+ # The parser gem always encodes to UTF-8, unless it is binary.
+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107
+ options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY
+
+ options
+ end
+
# Converts the version format handled by Parser to the format handled by Prism.
def convert_for_prism(version)
case version
when 33
- "3.3.0"
+ "3.3.1"
when 34
"3.4.0"
+ when 35, 40
+ "4.0.0"
+ when 41
+ "4.1.0"
else
"latest"
end
end
+ require_relative "parser/builder"
require_relative "parser/compiler"
require_relative "parser/lexer"
diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb
new file mode 100644
index 0000000000..7fc3bba6b7
--- /dev/null
+++ b/lib/prism/translation/parser/builder.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ class Parser
+ # A builder that knows how to convert more modern Ruby syntax
+ # into whitequark/parser gem's syntax tree.
+ class Builder < ::Parser::Builders::Default
+ # It represents the `it` block argument, which is not yet implemented in
+ # the Parser gem.
+ def itarg
+ n(:itarg, [:it], nil)
+ end
+
+ # The following three lines have been added to support the `it` block
+ # parameter syntax in the source code below.
+ #
+ # if args.type == :itarg
+ # block_type = :itblock
+ # args = :it
+ #
+ # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
+ def block(method_call, begin_t, args, body, end_t)
+ _receiver, _selector, *call_args = *method_call
+
+ if method_call.type == :yield
+ diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
+ end
+
+ last_arg = call_args.last
+ if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
+ diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
+ end
+
+ if args.type == :itarg
+ block_type = :itblock
+ args = :it
+ elsif args.type == :numargs
+ block_type = :numblock
+ args = args.children[0]
+ else
+ block_type = :block
+ end
+
+ if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
+ n(block_type, [ method_call, args, body ],
+ block_map(method_call.loc.expression, begin_t, end_t))
+ else
+ # Code like "return foo 1 do end" is reduced in a weird sequence.
+ # Here, method_call is actually (return).
+ actual_send, = *method_call
+ block =
+ n(block_type, [ actual_send, args, body ],
+ block_map(actual_send.loc.expression, begin_t, end_t))
+
+ n(method_call.type, [ block ],
+ method_call.loc.with_expression(join_exprs(method_call, block)))
+ end
+ end
+
+ # def foo(&nil); end
+ # ^^^^
+ def blocknilarg(amper_t, nil_t)
+ n0(:blocknilarg, arg_prefix_map(amper_t, nil_t))
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 0503d003f5..d11db12ae6 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1,13 +1,14 @@
# frozen_string_literal: true
+# :markup: markdown
module Prism
module Translation
class Parser
# A visitor that knows how to convert a prism syntax tree into the
# whitequark/parser gem's syntax tree.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# Raised when the tree is malformed or there is a bug in the compiler.
- class CompilationError < StandardError
+ class CompilationError < StandardError # :nodoc:
end
# The Parser::Base instance that is being used to build the AST.
@@ -74,7 +75,29 @@ module Prism
# []
# ^^
def visit_array_node(node)
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
+ elements = node.elements.flat_map do |element|
+ if element.is_a?(StringNode)
+ if element.content.include?("\n")
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
+ end
+ elsif element.is_a?(InterpolatedStringNode)
+ builder.string_compose(
+ token(element.opening_loc),
+ string_nodes_from_interpolation(element, node.opening),
+ token(element.closing_loc)
+ )
+ else
+ [visit(element)]
+ end
+ end
+ else
+ elements = visit_all(node.elements)
+ end
+
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
end
# foo => [bar]
@@ -90,7 +113,11 @@ module Prism
end
if node.constant
- builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc))
+ if visited.empty?
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc)), token(node.closing_loc))
+ else
+ builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.array_pattern(nil, visited, nil), token(node.closing_loc))
+ end
else
builder.array_pattern(token(node.opening_loc), visited, token(node.closing_loc))
end
@@ -105,38 +132,45 @@ module Prism
# { a: 1 }
# ^^^^
def visit_assoc_node(node)
- if in_pattern
- if node.value.is_a?(ImplicitNode)
- if node.key.is_a?(SymbolNode)
- builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
+ key = node.key
+
+ if node.value.is_a?(ImplicitNode)
+ if in_pattern
+ if key.is_a?(SymbolNode)
+ if key.opening.nil?
+ builder.match_hash_var([key.unescaped, srange(key.location)])
+ else
+ builder.match_hash_var_from_str(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc))
+ end
else
- builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
+ builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
end
else
- builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
- end
- elsif node.value.is_a?(ImplicitNode)
- if (value = node.value.value).is_a?(LocalVariableReadNode)
- builder.pair_keyword(
- [node.key.unescaped, srange(node.key)],
- builder.ident([value.name, srange(node.key.value_loc)]).updated(:lvar)
- )
- else
- builder.pair_label([node.key.unescaped, srange(node.key.location)])
+ value = node.value.value
+
+ implicit_value = if value.is_a?(CallNode)
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
+ elsif value.is_a?(ConstantReadNode)
+ builder.const([value.name, srange(key.value_loc)])
+ else
+ builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
+ end
+
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
end
elsif node.operator_loc
- builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
- elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
- builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
+ builder.pair(visit(key), token(node.operator_loc), visit(node.value))
+ elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
+ builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
else
parts =
- if node.key.is_a?(SymbolNode)
- [builder.string_internal([node.key.unescaped, srange(node.key.value_loc)])]
+ if key.is_a?(SymbolNode)
+ [builder.string_internal([key.unescaped, srange(key.value_loc)])]
else
- visit_all(node.key.parts)
+ visit_all(key.parts)
end
- builder.pair_quoted(token(node.key.opening_loc), parts, token(node.key.closing_loc), visit(node.value))
+ builder.pair_quoted(token(key.opening_loc), parts, token(key.closing_loc), visit(node.value))
end
end
@@ -146,7 +180,9 @@ module Prism
# { **foo }
# ^^^^^
def visit_assoc_splat_node(node)
- if node.value.nil? && forwarding.include?(:**)
+ if in_pattern
+ builder.match_rest(token(node.operator_loc), token(node.value&.location))
+ elsif node.value.nil? && forwarding.include?(:**)
builder.forwarded_kwrestarg(token(node.operator_loc))
else
builder.kwsplat(token(node.operator_loc), visit(node.value))
@@ -167,17 +203,24 @@ module Prism
if (rescue_clause = node.rescue_clause)
begin
find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.consequent&.location&.start_offset || (find_start_offset + 1))
+ find_end_offset = (
+ rescue_clause.statements&.location&.start_offset ||
+ rescue_clause.subsequent&.location&.start_offset ||
+ node.else_clause&.location&.start_offset ||
+ node.ensure_clause&.location&.start_offset ||
+ node.end_keyword_loc&.start_offset ||
+ find_start_offset + 1
+ )
rescue_bodies << builder.rescue_body(
token(rescue_clause.keyword_loc),
rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
token(rescue_clause.operator_loc),
visit(rescue_clause.reference),
- srange_find(find_start_offset, find_end_offset, [";"]),
+ srange_semicolon(find_start_offset, find_end_offset),
visit(rescue_clause.statements)
)
- end until (rescue_clause = rescue_clause.consequent).nil?
+ end until (rescue_clause = rescue_clause.subsequent).nil?
end
begin_body =
@@ -254,11 +297,6 @@ module Prism
if node.call_operator_loc.nil?
case name
- when :-@
- case (receiver = node.receiver).type
- when :integer_node, :float_node, :rational_node, :imaginary_node
- return visit(numeric_negate(node.message_loc, receiver))
- end
when :!
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
when :=~
@@ -280,7 +318,7 @@ module Prism
visit_all(arguments),
token(node.closing_loc),
),
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
),
block
@@ -297,7 +335,7 @@ module Prism
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
builder.assign(
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
)
else
@@ -328,18 +366,48 @@ module Prism
[],
nil
),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# foo.bar &&= baz
# ^^^^^^^^^^^^^^^
- alias visit_call_and_write_node visit_call_operator_write_node
+ def visit_call_and_write_node(node)
+ call_operator_loc = node.call_operator_loc
+
+ builder.op_assign(
+ builder.call_method(
+ visit(node.receiver),
+ call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)],
+ node.message_loc ? [node.read_name, srange(node.message_loc)] : nil,
+ nil,
+ [],
+ nil
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# foo.bar ||= baz
# ^^^^^^^^^^^^^^^
- alias visit_call_or_write_node visit_call_operator_write_node
+ def visit_call_or_write_node(node)
+ call_operator_loc = node.call_operator_loc
+
+ builder.op_assign(
+ builder.call_method(
+ visit(node.receiver),
+ call_operator_loc.nil? ? nil : [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)],
+ node.message_loc ? [node.read_name, srange(node.message_loc)] : nil,
+ nil,
+ [],
+ nil
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# foo.bar, = 1
# ^^^^^^^
@@ -366,8 +434,8 @@ module Prism
token(node.case_keyword_loc),
visit(node.predicate),
visit_all(node.conditions),
- token(node.consequent&.else_keyword_loc),
- visit(node.consequent),
+ token(node.else_clause&.else_keyword_loc),
+ visit(node.else_clause),
token(node.end_keyword_loc)
)
end
@@ -379,8 +447,8 @@ module Prism
token(node.case_keyword_loc),
visit(node.predicate),
visit_all(node.conditions),
- token(node.consequent&.else_keyword_loc),
- visit(node.consequent),
+ token(node.else_clause&.else_keyword_loc),
+ visit(node.else_clause),
token(node.end_keyword_loc)
)
end
@@ -406,9 +474,6 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
builder.assign(
builder.assignable(builder.cvar(token(node.name_loc))),
@@ -422,18 +487,30 @@ module Prism
def visit_class_variable_operator_write_node(node)
builder.op_assign(
builder.assignable(builder.cvar(token(node.name_loc))),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# @@foo &&= bar
# ^^^^^^^^^^^^^
- alias visit_class_variable_and_write_node visit_class_variable_operator_write_node
+ def visit_class_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.cvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# @@foo ||= bar
# ^^^^^^^^^^^^^
- alias visit_class_variable_or_write_node visit_class_variable_operator_write_node
+ def visit_class_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.cvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# @@foo, = bar
# ^^^^^
@@ -461,18 +538,30 @@ module Prism
def visit_constant_operator_write_node(node)
builder.op_assign(
builder.assignable(builder.const([node.name, srange(node.name_loc)])),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# Foo &&= bar
# ^^^^^^^^^^^^
- alias visit_constant_and_write_node visit_constant_operator_write_node
+ def visit_constant_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.const([node.name, srange(node.name_loc)])),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# Foo ||= bar
# ^^^^^^^^^^^^
- alias visit_constant_or_write_node visit_constant_operator_write_node
+ def visit_constant_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.const([node.name, srange(node.name_loc)])),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# Foo, = bar
# ^^^
@@ -486,13 +575,13 @@ module Prism
if node.parent.nil?
builder.const_global(
token(node.delimiter_loc),
- [node.child.name, srange(node.child.location)]
+ [node.name, srange(node.name_loc)]
)
else
builder.const_fetch(
visit(node.parent),
token(node.delimiter_loc),
- [node.child.name, srange(node.child.location)]
+ [node.name, srange(node.name_loc)]
)
end
end
@@ -515,18 +604,30 @@ module Prism
def visit_constant_path_operator_write_node(node)
builder.op_assign(
builder.assignable(visit(node.target)),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# Foo::Bar &&= baz
# ^^^^^^^^^^^^^^^^
- alias visit_constant_path_and_write_node visit_constant_path_operator_write_node
+ def visit_constant_path_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(visit(node.target)),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# Foo::Bar ||= baz
# ^^^^^^^^^^^^^^^^
- alias visit_constant_path_or_write_node visit_constant_path_operator_write_node
+ def visit_constant_path_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(visit(node.target)),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# Foo::Bar, = baz
# ^^^^^^^^
@@ -587,13 +688,37 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
- builder.keyword_cmd(
- :defined?,
- token(node.keyword_loc),
- token(node.lparen_loc),
- [visit(node.value)],
- token(node.rparen_loc)
- )
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in parser
+ # it gets parsed as having a begin. In this case we need to synthesize
+ # that begin to match parser's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ nil,
+ [
+ builder.begin(
+ token(node.lparen_loc),
+ visit(node.value),
+ token(node.rparen_loc)
+ )
+ ],
+ nil
+ )
+ else
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ token(node.lparen_loc),
+ [visit(node.value)],
+ token(node.rparen_loc)
+ )
+ end
end
# if foo then bar else baz end
@@ -656,10 +781,10 @@ module Prism
visit(node.index),
token(node.in_keyword_loc),
visit(node.collection),
- if node.do_keyword_loc
- token(node.do_keyword_loc)
+ if (do_keyword_loc = node.do_keyword_loc)
+ token(do_keyword_loc)
else
- srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, [";"])
+ srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
token(node.end_keyword_loc)
@@ -701,9 +826,6 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
builder.assign(
builder.assignable(builder.gvar(token(node.name_loc))),
@@ -717,18 +839,30 @@ module Prism
def visit_global_variable_operator_write_node(node)
builder.op_assign(
builder.assignable(builder.gvar(token(node.name_loc))),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# $foo &&= bar
# ^^^^^^^^^^^^
- alias visit_global_variable_and_write_node visit_global_variable_operator_write_node
+ def visit_global_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.gvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# $foo ||= bar
# ^^^^^^^^^^^^
- alias visit_global_variable_or_write_node visit_global_variable_operator_write_node
+ def visit_global_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.gvar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# $foo, = bar
# ^^^^
@@ -772,26 +906,26 @@ module Prism
visit(node.predicate),
token(node.then_keyword_loc),
visit(node.statements),
- token(node.consequent.else_keyword_loc),
- visit(node.consequent)
+ token(node.subsequent.else_keyword_loc),
+ visit(node.subsequent)
)
elsif node.if_keyword_loc.start_offset == node.location.start_offset
builder.condition(
token(node.if_keyword_loc),
visit(node.predicate),
- if node.then_keyword_loc
- token(node.then_keyword_loc)
+ if (then_keyword_loc = node.then_keyword_loc)
+ token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"])
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
- case node.consequent
+ case node.subsequent
when IfNode
- token(node.consequent.if_keyword_loc)
+ token(node.subsequent.if_keyword_loc)
when ElseNode
- token(node.consequent.else_keyword_loc)
+ token(node.subsequent.else_keyword_loc)
end,
- visit(node.consequent),
+ visit(node.subsequent),
if node.if_keyword != "elsif"
token(node.end_keyword_loc)
end
@@ -799,7 +933,7 @@ module Prism
else
builder.condition_mod(
visit(node.statements),
- visit(node.consequent),
+ visit(node.subsequent),
token(node.if_keyword_loc),
visit(node.predicate)
)
@@ -809,7 +943,7 @@ module Prism
# 1i
# ^^
def visit_imaginary_node(node)
- visit_numeric(node, builder.complex([imaginary_value(node), srange(node.location)]))
+ visit_numeric(node, builder.complex([Complex(0, node.numeric.value), srange(node.location)]))
end
# { foo: }
@@ -845,7 +979,11 @@ module Prism
token(node.in_loc),
pattern,
guard,
- srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset || node.location.end_offset, [";", "then"]),
+ if (then_loc = node.then_loc)
+ token(then_loc)
+ else
+ srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset)
+ end,
visit(node.statements)
)
end
@@ -863,18 +1001,46 @@ module Prism
visit_all(arguments),
token(node.closing_loc)
),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# foo[bar] &&= baz
# ^^^^^^^^^^^^^^^^
- alias visit_index_and_write_node visit_index_operator_write_node
+ def visit_index_and_write_node(node)
+ arguments = node.arguments&.arguments || []
+ arguments << node.block if node.block
+
+ builder.op_assign(
+ builder.index(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc)
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# foo[bar] ||= baz
# ^^^^^^^^^^^^^^^^
- alias visit_index_or_write_node visit_index_operator_write_node
+ def visit_index_or_write_node(node)
+ arguments = node.arguments&.arguments || []
+ arguments << node.block if node.block
+
+ builder.op_assign(
+ builder.index(
+ visit(node.receiver),
+ token(node.opening_loc),
+ visit_all(arguments),
+ token(node.closing_loc)
+ ),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# foo[bar], = 1
# ^^^^^^^^
@@ -882,7 +1048,7 @@ module Prism
builder.index_asgn(
visit(node.receiver),
token(node.opening_loc),
- visit_all(node.arguments.arguments),
+ visit_all(node.arguments&.arguments || []),
token(node.closing_loc),
)
end
@@ -908,18 +1074,30 @@ module Prism
def visit_instance_variable_operator_write_node(node)
builder.op_assign(
builder.assignable(builder.ivar(token(node.name_loc))),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# @foo &&= bar
# ^^^^^^^^^^^^
- alias visit_instance_variable_and_write_node visit_instance_variable_operator_write_node
+ def visit_instance_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ivar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# @foo ||= bar
# ^^^^^^^^^^^^
- alias visit_instance_variable_or_write_node visit_instance_variable_operator_write_node
+ def visit_instance_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ivar(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# @foo, = bar
# ^^^^
@@ -938,7 +1116,7 @@ module Prism
def visit_interpolated_regular_expression_node(node)
builder.regexp_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
)
@@ -952,34 +1130,12 @@ module Prism
# ^^^^^^^^^^^^
def visit_interpolated_string_node(node)
if node.heredoc?
- children, closing = visit_heredoc(node)
-
- return builder.string_compose(token(node.opening_loc), children, closing)
- end
-
- parts = if node.parts.one? { |part| part.type == :string_node }
- node.parts.flat_map do |node|
- if node.type == :string_node && node.unescaped.lines.count >= 2
- start_offset = node.content_loc.start_offset
-
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
- end
- else
- visit(node)
- end
- end
- else
- visit_all(node.parts)
+ return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
end
builder.string_compose(
token(node.opening_loc),
- parts,
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -989,7 +1145,7 @@ module Prism
def visit_interpolated_symbol_node(node)
builder.symbol_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -998,14 +1154,35 @@ module Prism
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
if node.heredoc?
- children, closing = visit_heredoc(node)
- builder.xstring_compose(token(node.opening_loc), children, closing)
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
+ end
+
+ builder.xstring_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
+ end
+
+ # -> { it }
+ # ^^
+ def visit_it_local_variable_read_node(node)
+ builder.ident([:it, srange(node.location)]).updated(:lvar)
+ end
+
+ # -> { it }
+ # ^^^^^^^^^
+ def visit_it_parameters_node(node)
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
+ # Currently RuboCop passes in its own builder that always inherits from the
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
+ # opts in to use the custom prism builder a warning can be emitted when
+ # it is not the expected class, and eventually raise.
+ # https://github.com/rubocop/rubocop-ast/pull/354
+ if builder.is_a?(Translation::Parser::Builder)
+ builder.itarg
else
- builder.xstring_compose(
- token(node.opening_loc),
- visit_all(node.parts),
- token(node.closing_loc)
- )
+ builder.args(nil, [], nil, false)
end
end
@@ -1028,15 +1205,17 @@ module Prism
end
# -> {}
+ # ^^^^^
def visit_lambda_node(node)
parameters = node.parameters
+ implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode)
builder.block(
builder.call_lambda(token(node.operator_loc)),
[node.opening, srange(node.opening_loc)],
if parameters.nil?
builder.args(nil, [], nil, false)
- elsif node.parameters.is_a?(NumberedParametersNode)
+ elsif implicit_parameters
visit(node.parameters)
else
builder.args(
@@ -1046,7 +1225,7 @@ module Prism
false
)
end,
- node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
+ visit(node.body),
[node.closing, srange(node.closing_loc)]
)
end
@@ -1072,18 +1251,30 @@ module Prism
def visit_local_variable_operator_write_node(node)
builder.op_assign(
builder.assignable(builder.ident(token(node.name_loc))),
- [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ [node.binary_operator_loc.slice.chomp("="), srange(node.binary_operator_loc)],
visit(node.value)
)
end
# foo &&= bar
# ^^^^^^^^^^^
- alias visit_local_variable_and_write_node visit_local_variable_operator_write_node
+ def visit_local_variable_and_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ident(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# foo ||= bar
# ^^^^^^^^^^^
- alias visit_local_variable_or_write_node visit_local_variable_operator_write_node
+ def visit_local_variable_or_write_node(node)
+ builder.op_assign(
+ builder.assignable(builder.ident(token(node.name_loc))),
+ [node.operator_loc.slice.chomp("="), srange(node.operator_loc)],
+ visit(node.value)
+ )
+ end
# foo, = bar
# ^^^
@@ -1128,7 +1319,7 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
+ def visit_error_recovery_node(node)
::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
end
@@ -1146,13 +1337,9 @@ module Prism
# foo, bar = baz
# ^^^^^^^^
def visit_multi_target_node(node)
- elements = [*node.lefts]
- elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
- elements.concat(node.rights)
-
builder.multi_lhs(
token(node.lparen_loc),
- visit_all(elements),
+ visit_all(multi_target_elements(node)),
token(node.rparen_loc)
)
end
@@ -1160,9 +1347,11 @@ module Prism
# foo, bar = baz
# ^^^^^^^^^^^^^^
def visit_multi_write_node(node)
- elements = [*node.lefts]
- elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
- elements.concat(node.rights)
+ elements = multi_target_elements(node)
+
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
+ elements = multi_target_elements(elements.first)
+ end
builder.multi_assign(
builder.multi_lhs(
@@ -1196,6 +1385,12 @@ module Prism
builder.nil(token(node.location))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc))
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1243,12 +1438,12 @@ module Prism
if node.requireds.any?
node.requireds.each do |required|
- if required.is_a?(RequiredParameterNode)
- params << visit(required)
- else
- compiler = copy_compiler(in_destructure: true)
- params << required.accept(compiler)
- end
+ params <<
+ if required.is_a?(RequiredParameterNode)
+ visit(required)
+ else
+ required.accept(copy_compiler(in_destructure: true))
+ end
end
end
@@ -1257,12 +1452,12 @@ module Prism
if node.posts.any?
node.posts.each do |post|
- if post.is_a?(RequiredParameterNode)
- params << visit(post)
- else
- compiler = copy_compiler(in_destructure: true)
- params << post.accept(compiler)
- end
+ params <<
+ if post.is_a?(RequiredParameterNode)
+ visit(post)
+ else
+ post.accept(copy_compiler(in_destructure: true))
+ end
end
end
@@ -1288,7 +1483,8 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
- expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
+ parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest
+ expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc))
builder.pin(token(node.operator_loc), expression)
end
@@ -1348,7 +1544,7 @@ module Prism
# 1r
# ^^
def visit_rational_node(node)
- visit_numeric(node, builder.rational([rational_value(node), srange(node.location)]))
+ visit_numeric(node, builder.rational([node.value, srange(node.location)]))
end
# redo
@@ -1360,9 +1556,18 @@ module Prism
# /foo/
# ^^^^^
def visit_regular_expression_node(node)
+ parts =
+ if node.content == ""
+ []
+ elsif node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
+ end
+
builder.regexp_compose(
token(node.opening_loc),
- [builder.string_internal(token(node.content_loc))],
+ parts,
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
)
@@ -1508,24 +1713,18 @@ module Prism
# ^^^^^
def visit_string_node(node)
if node.heredoc?
- children, closing = visit_heredoc(node.to_interpolated)
- builder.string_compose(token(node.opening_loc), children, closing)
+ visit_heredoc(node.to_interpolated) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
elsif node.opening == "?"
builder.character([node.unescaped, srange(node.location)])
+ elsif node.opening&.start_with?("%") && node.unescaped.empty?
+ builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
else
- parts = if node.content.lines.count <= 1 || node.unescaped.lines.count <= 1
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
- else
- start_offset = node.content_loc.start_offset
-
- [node.content.lines, node.unescaped.lines].transpose.map do |content_line, unescaped_line|
- end_offset = start_offset + content_line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([unescaped_line, offsets])
+ parts =
+ if node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
end
- end
builder.string_compose(
token(node.opening_loc),
@@ -1568,19 +1767,14 @@ module Prism
builder.symbol([node.unescaped, srange(node.location)])
end
else
- parts = if node.value.lines.one?
- [builder.string_internal([node.unescaped, srange(node.value_loc)])]
- else
- start_offset = node.value_loc.start_offset
-
- node.value.lines.map do |line|
- end_offset = start_offset + line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
+ parts =
+ if node.value_loc.nil?
+ []
+ elsif node.value.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
end
- end
builder.symbol_compose(
token(node.opening_loc),
@@ -1612,19 +1806,19 @@ module Prism
builder.condition(
token(node.keyword_loc),
visit(node.predicate),
- if node.then_keyword_loc
- token(node.then_keyword_loc)
+ if (then_keyword_loc = node.then_keyword_loc)
+ token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.consequent&.location || node.end_keyword_loc).start_offset, [";"])
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset)
end,
- visit(node.consequent),
- token(node.consequent&.else_keyword_loc),
+ visit(node.else_clause),
+ token(node.else_clause&.else_keyword_loc),
visit(node.statements),
token(node.end_keyword_loc)
)
else
builder.condition_mod(
- visit(node.consequent),
+ visit(node.else_clause),
visit(node.statements),
token(node.keyword_loc),
visit(node.predicate)
@@ -1633,7 +1827,7 @@ module Prism
end
# until foo; bar end
- # ^^^^^^^^^^^^^^^^^
+ # ^^^^^^^^^^^^^^^^^^
#
# bar until foo
# ^^^^^^^^^^^^^
@@ -1643,7 +1837,11 @@ module Prism
:until,
token(node.keyword_loc),
visit(node.predicate),
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]),
+ if (do_keyword_loc = node.do_keyword_loc)
+ token(do_keyword_loc)
+ else
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
+ end,
visit(node.statements),
token(node.closing_loc)
)
@@ -1663,10 +1861,10 @@ module Prism
builder.when(
token(node.keyword_loc),
visit_all(node.conditions),
- if node.then_keyword_loc
- token(node.then_keyword_loc)
+ if (then_keyword_loc = node.then_keyword_loc)
+ token(then_keyword_loc)
else
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset || (node.conditions.last.location.end_offset + 1), [";"])
+ srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1683,7 +1881,11 @@ module Prism
:while,
token(node.keyword_loc),
visit(node.predicate),
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]),
+ if (do_keyword_loc = node.do_keyword_loc)
+ token(do_keyword_loc)
+ else
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
+ end,
visit(node.statements),
token(node.closing_loc)
)
@@ -1701,29 +1903,23 @@ module Prism
# ^^^^^
def visit_x_string_node(node)
if node.heredoc?
- children, closing = visit_heredoc(node.to_interpolated)
- builder.xstring_compose(token(node.opening_loc), children, closing)
- else
- parts = if node.unescaped.lines.one?
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
- else
- start_offset = node.content_loc.start_offset
-
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
+ return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
+ end
- builder.string_internal([line, offsets])
- end
+ parts =
+ if node.content == ""
+ []
+ elsif node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
end
- builder.xstring_compose(
- token(node.opening_loc),
- parts,
- token(node.closing_loc)
- )
- end
+ builder.xstring_compose(
+ token(node.opening_loc),
+ parts,
+ token(node.closing_loc)
+ )
end
# yield
@@ -1764,24 +1960,12 @@ module Prism
forwarding
end
- # Because we have mutated the AST to allow for newlines in the middle of
- # a rational, we need to manually handle the value here.
- def imaginary_value(node)
- Complex(0, node.numeric.is_a?(RationalNode) ? rational_value(node.numeric) : node.numeric.value)
- end
-
- # Negate the value of a numeric node. This is a special case where you
- # have a negative sign on one line and then a number on the next line.
- # In normal Ruby, this will always be a method call. The parser gem,
- # however, marks this as a numeric literal. We have to massage the tree
- # here to get it into the correct form.
- def numeric_negate(message_loc, receiver)
- case receiver.type
- when :integer_node, :float_node
- receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
- when :rational_node, :imaginary_node
- receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
- end
+ # Returns the set of targets for a MultiTargetNode or a MultiWriteNode.
+ def multi_target_elements(node)
+ elements = [*node.lefts]
+ elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
+ elements.concat(node.rights)
+ elements
end
# Blocks can have a special set of parameters that automatically expand
@@ -1798,16 +1982,6 @@ module Prism
parameters.block.nil?
end
- # Because we have mutated the AST to allow for newlines in the middle of
- # a rational, we need to manually handle the value here.
- def rational_value(node)
- if node.numeric.is_a?(IntegerNode)
- Rational(node.numeric.value)
- else
- Rational(node.slice.gsub(/\s/, "").chomp("r"))
- end
- end
-
# Locations in the parser gem AST are generated using this class. We
# store a reference to its constant to make it slightly faster to look
# up.
@@ -1823,14 +1997,16 @@ module Prism
Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
end
- # Constructs a new source range by finding the given tokens between the
- # given start offset and end offset. If the needle is not found, it
- # returns nil.
- def srange_find(start_offset, end_offset, tokens)
- tokens.find do |token|
- next unless (index = source_buffer.source.byteslice(start_offset...end_offset).index(token))
- offset = start_offset + index
- return [token, Range.new(source_buffer, offset_cache[offset], offset_cache[offset + token.length])]
+ # Constructs a new source range by finding a semicolon between the given
+ # start offset and end offset. If the semicolon is not found, it returns
+ # nil. Importantly it does not search past newlines or comments.
+ #
+ # Note that end_offset is allowed to be nil, in which case this will
+ # search until the end of the string.
+ def srange_semicolon(start_offset, end_offset)
+ if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/])
+ final_offset = start_offset + match.bytesize
+ [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])]
end
end
@@ -1843,20 +2019,22 @@ module Prism
def visit_block(call, block)
if block
parameters = block.parameters
+ implicit_parameters = parameters.is_a?(NumberedParametersNode) || parameters.is_a?(ItParametersNode)
builder.block(
call,
token(block.opening_loc),
if parameters.nil?
builder.args(nil, [], nil, false)
- elsif parameters.is_a?(NumberedParametersNode)
+ elsif implicit_parameters
visit(parameters)
else
builder.args(
token(parameters.opening_loc),
if procarg0?(parameters.parameters)
parameter = parameters.parameters.requireds.first
- [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
+ visited = parameter.is_a?(RequiredParameterNode) ? visit(parameter) : parameter.accept(copy_compiler(in_destructure: true))
+ [builder.procarg0(visited)].concat(visit_all(parameters.locals))
else
visit(parameters)
end,
@@ -1864,7 +2042,7 @@ module Prism
false
)
end,
- block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
+ visit(block.body),
token(block.closing_loc)
)
else
@@ -1875,28 +2053,21 @@ module Prism
# Visit a heredoc that can be either a string or an xstring.
def visit_heredoc(node)
children = Array.new
+ indented = false
+
+ # If this is a dedenting heredoc, then we need to insert the opening
+ # content into the children as well.
+ if node.opening.start_with?("<<~") && node.parts.length > 0 && !node.parts.first.is_a?(StringNode)
+ location = node.parts.first.location
+ location = location.copy(start_offset: location.start_offset - location.start_line_slice.bytesize)
+ children << builder.string_internal(token(location))
+ indented = true
+ end
+
node.parts.each do |part|
pushing =
- if part.is_a?(StringNode) && part.unescaped.include?("\n")
- unescaped = part.unescaped.lines(chomp: true)
- escaped = part.content.lines(chomp: true)
-
- escaped_lengths =
- if node.opening.end_with?("'")
- escaped.map { |line| line.bytesize + 1 }
- else
- escaped.chunk_while { |before, after| before.match?(/(?<!\\)\\$/) }.map { |line| line.join.bytesize + line.length }
- end
-
- start_offset = part.location.start_offset
- end_offset = nil
-
- unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
- end_offset = start_offset + (escaped_length || 0)
- inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
- start_offset = end_offset
- inner_part
- end
+ if part.is_a?(StringNode) && part.content.include?("\n")
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
else
[visit(part)]
end
@@ -1905,7 +2076,12 @@ module Prism
if child.type == :str && child.children.last == ""
# nothing
elsif child.type == :str && children.last && children.last.type == :str && !children.last.children.first.end_with?("\n")
- children.last.children.first << child.children.first
+ appendee = children[-1]
+
+ location = appendee.loc
+ location = location.with_expression(location.expression.join(child.loc.expression))
+
+ children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
else
children << child
end
@@ -1914,8 +2090,10 @@ module Prism
closing = node.closing
closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))]
+ composed = yield children, closing_t
- [children, closing_t]
+ composed = composed.updated(nil, children[1..-1]) if indented
+ composed
end
# Visit a numeric node and account for the optional sign.
@@ -1939,6 +2117,102 @@ module Prism
parser.pattern_variables.pop
end
end
+
+ # When the content of a string node is split across multiple lines, the
+ # parser gem creates individual string nodes for each line the content is part of.
+ def string_nodes_from_interpolation(node, opening)
+ node.parts.flat_map do |part|
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
+ else
+ visit(part)
+ end
+ end
+ end
+
+ # Create parser string nodes from a single prism node. The parser gem
+ # "glues" strings together when a line continuation is encountered.
+ def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
+ unescaped = unescaped.lines
+ escaped = escaped.lines
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+ regex = opening == "/" || opening&.start_with?("%r")
+
+ # Non-interpolating strings
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
+ current_length = 0
+ current_line = +""
+
+ escaped.filter_map.with_index do |escaped_line, index|
+ unescaped_line = unescaped.fetch(index, "")
+ current_length += escaped_line.bytesize
+ current_line << unescaped_line
+
+ # Glue line continuations together. Only %w and %i arrays can contain these.
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
+ next unless index == escaped.count - 1
+ end
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
+ start_offset += escaped_line.bytesize
+ current_line = +""
+ current_length = 0
+ s
+ end
+ else
+ escaped_lengths = []
+ normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
+
+ escaped
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
+ .each do |lines|
+ escaped_lengths << lines.sum(&:bytesize)
+
+ unescaped_lines_count =
+ if regex
+ 0 # Will always be preserved as is
+ else
+ lines.sum do |line|
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+ count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0
+ count
+ end
+ end
+
+ extra = 1
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
+
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
+ do_next_tokens[-1] = true
+ end
+
+ current_line = +""
+ current_normalized_length = 0
+
+ emitted_count = 0
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
+ current_line = +""
+ current_normalized_length = 0
+ emitted_count += 1
+ inner_part
+ else
+ nil
+ end
+ end
+ end
+ end
end
end
end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 9d7caae0ba..e82042867f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -1,21 +1,25 @@
# frozen_string_literal: true
+# :markup: markdown
+
+require "strscan"
+require_relative "../../polyfill/append_as_bytes"
+require_relative "../../polyfill/scan_byte"
module Prism
module Translation
class Parser
# Accepts a list of prism tokens and converts them into the expected
# format for the parser gem.
- class Lexer
+ class Lexer # :nodoc:
+ # These tokens are always skipped
+ TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
+ private_constant :TYPES_ALWAYS_SKIP
+
# The direct translating of types between the two lexers.
TYPES = {
# These tokens should never appear in the output of the lexer.
- EOF: nil,
- MISSING: nil,
- NOT_PROVIDED: nil,
- IGNORED_NEWLINE: nil,
EMBDOC_END: nil,
EMBDOC_LINE: nil,
- __END__: nil,
# These tokens have more or less direct mappings.
AMPERSAND: :tAMPER2,
@@ -83,6 +87,7 @@ module Prism
KEYWORD_DEF: :kDEF,
KEYWORD_DEFINED: :kDEFINED,
KEYWORD_DO: :kDO,
+ KEYWORD_DO_BLOCK: :kDO_BLOCK,
KEYWORD_DO_LOOP: :kDO_COND,
KEYWORD_END: :kEND,
KEYWORD_END_UPCASE: :klEND,
@@ -134,7 +139,7 @@ module Prism
MINUS_GREATER: :tLAMBDA,
NEWLINE: :tNL,
NUMBERED_REFERENCE: :tNTH_REF,
- PARENTHESIS_LEFT: :tLPAREN,
+ PARENTHESIS_LEFT: :tLPAREN2,
PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
PARENTHESIS_RIGHT: :tRPAREN,
PERCENT: :tPERCENT,
@@ -173,7 +178,7 @@ module Prism
UMINUS_NUM: :tUNARY_NUM,
UPLUS: :tUPLUS,
USTAR: :tSTAR,
- USTAR_STAR: :tPOW,
+ USTAR_STAR: :tDSTAR,
WORDS_SEP: :tSPACE
}
@@ -184,10 +189,31 @@ module Prism
# without them. We should find another way to do this, but in the
# meantime we'll hide them from the documentation and mark them as
# private constants.
- EXPR_BEG = 0x1 # :nodoc:
- EXPR_LABEL = 0x400 # :nodoc:
+ EXPR_BEG = 0x1
+ EXPR_LABEL = 0x400
+
+ # It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`.
+ #
+ # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
+ # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
+ LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
+
+ # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
+ # The following token types are listed as those classified as `tLPAREN`.
+ LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
+ :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
+ :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
+ ])
- private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL
+ # Types of tokens that are allowed to continue a method call with comments in-between.
+ # For these, the parser gem doesn't emit a newline token after the last comment.
+ COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
+ private_constant :COMMENT_CONTINUATION_TYPES
+
+ # Heredocs are complex and require us to keep track of a bit of info to refer to later
+ HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true)
+
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL, :LAMBDA_TOKEN_TYPES, :LPAREN_CONVERSION_TOKEN_TYPES, :HeredocData
# The Parser::Source::Buffer that the tokens were lexed from.
attr_reader :source_buffer
@@ -207,7 +233,7 @@ module Prism
@offset_cache = offset_cache
end
- Range = ::Parser::Source::Range # :nodoc:
+ Range = ::Parser::Source::Range
private_constant :Range
# Convert the prism tokens into the expected format for the parser gem.
@@ -217,39 +243,78 @@ module Prism
index = 0
length = lexed.length
- heredoc_identifier_stack = []
+ heredoc_stack = []
+ quote_stack = []
+
+ # The parser gem emits the newline tokens for comments out of order. This saves
+ # that token location to emit at a later time to properly line everything up.
+ # https://github.com/whitequark/parser/issues/1025
+ comment_newline_location = nil
while index < length
token, state = lexed[index]
index += 1
- next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
+ next if TYPES_ALWAYS_SKIP.include?(token.type)
type = TYPES.fetch(token.type)
value = token.value
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
+ location = range(token.location.start_offset, token.location.end_offset)
case type
+ when :kDO
+ nearest_lambda_token = tokens.reverse_each.find do |token|
+ LAMBDA_TOKEN_TYPES.include?(token.first)
+ end
+
+ if nearest_lambda_token&.first == :tLAMBDA
+ type = :kDO_LAMBDA
+ end
when :tCHARACTER
value.delete_prefix!("?")
+ # Character literals behave similar to double-quoted strings. We can use the same escaping mechanism.
+ value = unescape_string(value, "?")
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
- start_index = index
- while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
+ while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
- if start_index != index
- value += next_token.value
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
- index += 1
- end
+ value += next_token.value
+ location = range(token.location.start_offset, next_token.location.end_offset)
+ index += 1
else
- value.chomp!
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
+ is_at_eol = value.chomp!.nil?
+ location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
+
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ next_token, _ = lexed[index]
+
+ is_inline_comment = prev_token&.location&.start_line == token.location.start_line
+ if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+ tokens << [:tCOMMENT, [value, location]]
+
+ nl_location = range(token.location.end_offset - 1, token.location.end_offset)
+ tokens << [:tNL, [nil, nl_location]]
+ next
+ elsif is_inline_comment && next_token&.type == :COMMENT
+ comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset)
+ elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+ tokens << [:tCOMMENT, [value, location]]
+ tokens << [:tNL, [nil, comment_newline_location]]
+ comment_newline_location = nil
+ next
+ end
end
when :tNL
+ next_token, _ = lexed[index]
+ # Newlines after comments are emitted out of order.
+ if next_token&.type == :COMMENT
+ comment_newline_location = location
+ next
+ end
+
value = nil
when :tFLOAT
value = parse_float(value)
@@ -257,8 +322,8 @@ module Prism
value = parse_complex(value)
when :tINTEGER
if value.start_with?("+")
- tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
- location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
+ tokens << [:tUNARY_NUM, ["+", range(token.location.start_offset, token.location.start_offset + 1)]]
+ location = range(token.location.start_offset + 1, token.location.end_offset)
end
value = parse_integer(value)
@@ -268,6 +333,8 @@ module Prism
value.chomp!(":")
when :tLCURLY
type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
+ when :tLPAREN2
+ type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
when :tNTH_REF
value = parse_integer(value.delete_prefix("$"))
when :tOP_ASGN
@@ -275,92 +342,196 @@ module Prism
when :tRATIONAL
value = parse_rational(value)
when :tSPACE
+ location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
value = nil
when :tSTRING_BEG
- if token.type == :HEREDOC_START
- heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
- end
- if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
+ next_token, _ = lexed[index]
+ next_next_token, _ = lexed[index + 1]
+ basic_quotes = value == '"' || value == "'"
+
+ if basic_quotes && next_token&.type == :STRING_END
next_location = token.location.join(next_token.location)
type = :tSTRING
value = ""
- location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+ location = range(next_location.start_offset, next_location.end_offset)
index += 1
- elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
- next_location = token.location.join(next_next_token.location)
- type = :tSTRING
- value = next_token.value.gsub("\\\\", "\\")
- location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
- index += 2
- elsif value.start_with?("<<")
+ elsif value.start_with?("'", '"', "%")
+ if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END
+ string_value = next_token.value
+ if simplify_string?(string_value, value)
+ next_location = token.location.join(next_next_token.location)
+ if percent_array?(value)
+ value = percent_array_unescape(string_value)
+ else
+ value = unescape_string(string_value, value)
+ end
+ type = :tSTRING
+ location = range(next_location.start_offset, next_location.end_offset)
+ index += 2
+ tokens << [type, [value, location]]
+
+ next
+ end
+ end
+
+ quote_stack.push(value)
+ elsif token.type == :HEREDOC_START
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
+ heredoc_type = value[2] == "-" || value[2] == "~" ? value[2] : ""
+ heredoc = HeredocData.new(
+ identifier: value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier],
+ common_whitespace: 0,
+ )
+
if quote == "`"
type = :tXSTRING_BEG
- value = "<<`"
+ end
+
+ # The parser gem trims whitespace from squiggly heredocs. We must record
+ # the most common whitespace to later remove.
+ if heredoc_type == "~" || heredoc_type == "`"
+ heredoc.common_whitespace = calculate_heredoc_whitespace(index)
+ end
+
+ if quote == "'" || quote == '"' || quote == "`"
+ value = "<<#{quote}"
else
- value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
+ value = '<<"'
end
+
+ heredoc_stack.push(heredoc)
+ quote_stack.push(value)
end
when :tSTRING_CONTENT
- unless (lines = token.value.lines).one?
- start_offset = offset_cache[token.location.start_offset]
- lines.map do |line|
- newline = line.end_with?("\r\n") ? "\r\n" : "\n"
+ is_percent_array = percent_array?(quote_stack.last)
+
+ if (lines = token.value.lines).one?
+ # Prism usually emits a single token for strings with line continuations.
+ # For squiggly heredocs they are not joined so we do that manually here.
+ current_string = +""
+ current_length = 0
+ start_offset = token.location.start_offset
+ while token.type == :STRING_CONTENT
+ current_length += token.value.bytesize
+ # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
+ # The parser gem only removes indentation when the heredoc is not nested
+ not_nested = heredoc_stack.size == 1
+ if is_percent_array
+ value = percent_array_unescape(token.value)
+ elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
+ value = trim_heredoc_whitespace(token.value, current_heredoc)
+ end
+
+ current_string << unescape_string(value, quote_stack.last)
+ relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
+ 0 # the last backslash escapes the newline
+ else
+ token.value[/(\\{1,})\n/, 1]&.length || 0
+ end
+ if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
+ tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
+ break
+ end
+ token, _ = lexed[index]
+ index += 1
+ end
+ else
+ # When the parser gem encounters a line continuation inside of a multiline string,
+ # it emits a single string node. The backslash (and remaining newline) is removed.
+ current_line = +""
+ adjustment = 0
+ start_offset = token.location.start_offset
+ emit = false
+
+ lines.each.with_index do |line, index|
chomped_line = line.chomp
- if match = chomped_line.match(/(?<backslashes>\\+)\z/)
- adjustment = match[:backslashes].size / 2
- adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
- if match[:backslashes].size.odd?
- adjusted_line.delete_suffix!("\\")
- adjustment += 2
+ backslash_count = chomped_line[/\\{1,}\z/]&.length || 0
+ is_interpolation = interpolation?(quote_stack.last)
+
+ if backslash_count.odd? && (is_interpolation || is_percent_array)
+ if is_percent_array
+ current_line << percent_array_unescape(line)
+ adjustment += 1
else
- adjusted_line << newline
+ chomped_line.delete_suffix!("\\")
+ current_line << chomped_line
+ adjustment += 2
end
+ # If the string ends with a line continuation emit the remainder
+ emit = index == lines.count - 1
else
- adjusted_line = line
- adjustment = 0
+ current_line << line
+ emit = true
end
- end_offset = start_offset + adjusted_line.length + adjustment
- tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
- start_offset = end_offset
+ if emit
+ end_offset = start_offset + current_line.bytesize + adjustment
+ tokens << [:tSTRING_CONTENT, [unescape_string(current_line, quote_stack.last), range(start_offset, end_offset)]]
+ start_offset = end_offset
+ current_line = +""
+ adjustment = 0
+ end
end
- next
end
+ next
when :tSTRING_DVAR
value = nil
when :tSTRING_END
if token.type == :HEREDOC_END && value.end_with?("\n")
newline_length = value.end_with?("\r\n") ? 2 : 1
- value = heredoc_identifier_stack.pop
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
+ value = heredoc_stack.pop.identifier
+ location = range(token.location.start_offset, token.location.end_offset - newline_length)
elsif token.type == :REGEXP_END
value = value[0]
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
+ location = range(token.location.start_offset, token.location.start_offset + 1)
+ end
+
+ if percent_array?(quote_stack.pop)
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
+ ends_with_whitespace = prev_token&.type == :WORDS_SEP
+ # parser always emits a space token after content in a percent array, even if no actual whitespace is present.
+ if !empty && !ends_with_whitespace
+ tokens << [:tSPACE, [nil, range(token.location.start_offset, token.location.start_offset)]]
+ end
end
when :tSYMBEG
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
+ if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
- location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+ location = range(next_location.start_offset, next_location.end_offset)
index += 1
+ else
+ quote_stack.push(value)
end
when :tFID
if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
type = :tIDENTIFIER
end
when :tXSTRING_BEG
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
+ if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
+ # self.`()
type = :tBACK_REF2
end
+ quote_stack.push(value)
+ when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
+ if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
+ index += 1
+ end
+
+ quote_stack.push(value)
+ when :tREGEXP_BEG
+ quote_stack.push(value)
end
tokens << [type, [value, location]]
if token.type == :REGEXP_END
- tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
+ tokens << [:tREGEXP_OPT, [token.value[1..], range(token.location.start_offset + 1, token.location.end_offset)]]
end
end
@@ -369,6 +540,11 @@ module Prism
private
+ # Creates a new parser range, taking prisms byte offsets into account
+ def range(start_offset, end_offset)
+ Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
+ end
+
# Parse an integer from the string representation.
def parse_integer(value)
Integer(value)
@@ -410,6 +586,233 @@ module Prism
rescue ArgumentError
0r
end
+
+ # Wonky heredoc tab/spaces rules.
+ # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L10548-L10558
+ def calculate_heredoc_whitespace(heredoc_token_index)
+ next_token_index = heredoc_token_index
+ nesting_level = 0
+ previous_line = -1
+ result = Float::MAX
+
+ while (next_token = lexed[next_token_index]&.first)
+ next_token_index += 1
+ next_next_token, _ = lexed[next_token_index]
+ first_token_on_line = next_token.location.start_column == 0
+
+ # String content inside nested heredocs and interpolation is ignored
+ if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN
+ # When interpolation is the first token of a line there is no string
+ # content to check against. There will be no common whitespace.
+ if nesting_level == 0 && first_token_on_line
+ result = 0
+ end
+ nesting_level += 1
+ elsif next_token.type == :HEREDOC_END || next_token.type == :EMBEXPR_END
+ nesting_level -= 1
+ # When we encountered the matching heredoc end, we can exit
+ break if nesting_level == -1
+ elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line
+ common_whitespace = 0
+ next_token.value[/^\s*/].each_char do |char|
+ if char == "\t"
+ common_whitespace = (common_whitespace / 8 + 1) * 8;
+ else
+ common_whitespace += 1
+ end
+ end
+
+ is_first_token_on_line = next_token.location.start_line != previous_line
+ # Whitespace is significant if followed by interpolation
+ whitespace_only = common_whitespace == next_token.value.length && next_next_token&.location&.start_line != next_token.location.start_line
+ if is_first_token_on_line && !whitespace_only && common_whitespace < result
+ result = common_whitespace
+ previous_line = next_token.location.start_line
+ end
+ end
+ end
+ result
+ end
+
+ # Wonky heredoc tab/spaces rules.
+ # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L16528-L16545
+ def trim_heredoc_whitespace(string, heredoc)
+ trimmed_whitespace = 0
+ trimmed_characters = 0
+ while (string[trimmed_characters] == "\t" || string[trimmed_characters] == " ") && trimmed_whitespace < heredoc.common_whitespace
+ if string[trimmed_characters] == "\t"
+ trimmed_whitespace = (trimmed_whitespace / 8 + 1) * 8;
+ break if trimmed_whitespace > heredoc.common_whitespace
+ else
+ trimmed_whitespace += 1
+ end
+ trimmed_characters += 1
+ end
+
+ string[trimmed_characters..]
+ end
+
+ # Escape sequences that have special and should appear unescaped in the resulting string.
+ ESCAPES = {
+ "a" => "\a", "b" => "\b", "e" => "\e", "f" => "\f",
+ "n" => "\n", "r" => "\r", "s" => "\s", "t" => "\t",
+ "v" => "\v", "\\" => "\\"
+ }.freeze
+ private_constant :ESCAPES
+
+ # When one of these delimiters is encountered, then the other
+ # one is allowed to be escaped as well.
+ DELIMITER_SYMETRY = { "[" => "]", "(" => ")", "{" => "}", "<" => ">" }.freeze
+ private_constant :DELIMITER_SYMETRY
+
+
+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/lexer-strings.rl#L14
+ REGEXP_META_CHARACTERS = ["\\", "$", "(", ")", "*", "+", ".", "<", ">", "?", "[", "]", "^", "{", "|", "}"]
+ private_constant :REGEXP_META_CHARACTERS
+
+ # Apply Ruby string escaping rules
+ def unescape_string(string, quote)
+ # In single-quoted heredocs, everything is taken literally.
+ return string if quote == "<<'"
+
+ # OPTIMIZATION: Assume that few strings need escaping to speed up the common case.
+ return string unless string.include?("\\")
+
+ # Enclosing character for the string. `"` for `"foo"`, `{` for `%w{foo}`, etc.
+ delimiter = quote[-1]
+
+ if regexp?(quote)
+ # Should be escaped handled to single-quoted heredocs. The only character that is
+ # allowed to be escaped is the delimiter, except when that also has special meaning
+ # in the regexp. Since all the symetry delimiters have special meaning, they don't need
+ # to be considered separately.
+ if REGEXP_META_CHARACTERS.include?(delimiter)
+ string
+ else
+ # There can never be an even amount of backslashes. It would be a syntax error.
+ string.gsub(/\\(#{Regexp.escape(delimiter)})/, '\1')
+ end
+ elsif interpolation?(quote)
+ # Appending individual escape sequences may force the string out of its intended
+ # encoding. Start out with binary and force it back later.
+ result = "".b
+
+ scanner = StringScanner.new(string)
+ while (skipped = scanner.skip_until(/\\/))
+ # Append what was just skipped over, excluding the found backslash.
+ result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
+ escape_read(result, scanner, false, false)
+ end
+
+ # Add remaining chars
+ result.append_as_bytes(string.byteslice(scanner.pos..))
+ result.force_encoding(source_buffer.source.encoding)
+ else
+ delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}")
+ string.gsub(/\\([\\#{delimiters}])/, '\1')
+ end
+ end
+
+ # Certain strings are merged into a single string token.
+ def simplify_string?(value, quote)
+ case quote
+ when "'"
+ # Only simplify 'foo'
+ !value.include?("\n")
+ when '"'
+ # Simplify when every line ends with a line continuation, or it is the last line
+ value.lines.all? do |line|
+ !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd?
+ end
+ else
+ # %q and similar are never simplified
+ false
+ end
+ end
+
+ # Escape a byte value, given the control and meta flags.
+ def escape_build(value, control, meta)
+ value &= 0x9f if control
+ value |= 0x80 if meta
+ value
+ end
+
+ # Read an escape out of the string scanner, given the control and meta
+ # flags, and push the unescaped value into the result.
+ def escape_read(result, scanner, control, meta)
+ if scanner.skip("\n")
+ # Line continuation
+ elsif (value = ESCAPES[scanner.peek(1)])
+ # Simple single-character escape sequences like \n
+ result.append_as_bytes(value)
+ scanner.pos += 1
+ elsif (value = scanner.scan(/[0-7]{1,3}/))
+ # \nnn
+ result.append_as_bytes(escape_build(value.to_i(8), control, meta))
+ elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/))
+ # \xnn
+ result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta))
+ elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/))
+ # \unnnn
+ result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8))
+ elsif scanner.skip("u{}")
+ # https://github.com/whitequark/parser/issues/856
+ elsif (value = scanner.scan(/u{.*?}/))
+ # \u{nnnn ...}
+ value[2..-2].split.each do |unicode|
+ result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
+ end
+ elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/))
+ # \cx or \C-x where x is an ASCII printable character
+ escape_read(result, scanner, true, meta)
+ elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
+ # \M-x where x is an ASCII printable character
+ escape_read(result, scanner, control, true)
+ elsif (byte = scanner.scan_byte)
+ # Something else after an escape.
+ if control && byte == 0x3f # ASCII '?'
+ result.append_as_bytes(escape_build(0x7f, false, meta))
+ else
+ result.append_as_bytes(escape_build(byte, control, meta))
+ end
+ end
+ end
+
+ # In a percent array, certain whitespace can be preceeded with a backslash,
+ # causing the following characters to be part of the previous element.
+ def percent_array_unescape(string)
+ string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match|
+ full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd?
+ full_match
+ end
+ end
+
+ # For %-arrays whitespace, the parser gem only considers whitespace before the newline.
+ def percent_array_leading_whitespace(string)
+ return 1 if string.start_with?("\n")
+
+ leading_whitespace = 0
+ string.each_char do |c|
+ break if c == "\n"
+ leading_whitespace += 1
+ end
+ leading_whitespace
+ end
+
+ # Determine if characters preceeded by a backslash should be escaped or not
+ def interpolation?(quote)
+ !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
+ end
+
+ # Regexp allow interpolation but are handled differently during unescaping
+ def regexp?(quote)
+ quote == "/" || quote.start_with?("%r")
+ end
+
+ # Determine if the string is part of a %-style array.
+ def percent_array?(quote)
+ quote.start_with?("%w", "%W", "%i", "%I")
+ end
end
end
end
diff --git a/lib/prism/translation/parser/rubocop.rb b/lib/prism/translation/parser/rubocop.rb
deleted file mode 100644
index 6c9687a5cc..0000000000
--- a/lib/prism/translation/parser/rubocop.rb
+++ /dev/null
@@ -1,73 +0,0 @@
-# frozen_string_literal: true
-# typed: ignore
-
-warn "WARN: Prism is directly supported since RuboCop 1.62. The `prism/translation/parser/rubocop` file is deprecated."
-
-require "parser"
-require "rubocop"
-
-require_relative "../../prism"
-require_relative "../parser"
-
-module Prism
- module Translation
- class Parser
- # This is the special version numbers that should be used in RuboCop
- # configuration files to trigger using prism.
-
- # For Ruby 3.3
- VERSION_3_3 = 80_82_73_83_77.33
-
- # For Ruby 3.4
- VERSION_3_4 = 80_82_73_83_77.34
-
- # This module gets prepended into RuboCop::AST::ProcessedSource.
- module ProcessedSource
- # This condition is compatible with rubocop-ast versions up to 1.30.0.
- if RuboCop::AST::ProcessedSource.instance_method(:parser_class).arity == 1
- # Redefine parser_class so that we can inject the prism parser into the
- # list of known parsers.
- def parser_class(ruby_version)
- if ruby_version == Prism::Translation::Parser::VERSION_3_3
- warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.33` is deprecated. " \
- "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.3` instead."
- require_relative "../parser33"
- Prism::Translation::Parser33
- elsif ruby_version == Prism::Translation::Parser::VERSION_3_4
- warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.34` is deprecated. " \
- "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.4` instead."
- require_relative "../parser34"
- Prism::Translation::Parser34
- else
- super
- end
- end
- else
- # Redefine parser_class so that we can inject the prism parser into the
- # list of known parsers.
- def parser_class(ruby_version, _parser_engine)
- if ruby_version == Prism::Translation::Parser::VERSION_3_3
- warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.33` is deprecated. " \
- "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.3` instead."
- require_relative "../parser33"
- Prism::Translation::Parser33
- elsif ruby_version == Prism::Translation::Parser::VERSION_3_4
- warn "WARN: Setting `TargetRubyVersion: 80_82_73_83_77.34` is deprecated. " \
- "Set to `ParserEngine: parser_prism` and `TargetRubyVersion: 3.4` instead."
- require_relative "../parser34"
- Prism::Translation::Parser34
- else
- super
- end
- end
- end
- end
- end
- end
-end
-
-# :stopdoc:
-RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
-known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
-RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
-RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb
deleted file mode 100644
index b09266e06a..0000000000
--- a/lib/prism/translation/parser33.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
- class Parser33 < Parser
- def version # :nodoc:
- 33
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb
deleted file mode 100644
index 0ead70ad3c..0000000000
--- a/lib/prism/translation/parser34.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
- class Parser34 < Parser
- def version # :nodoc:
- 34
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb
new file mode 100644
index 0000000000..f7c1070e30
--- /dev/null
+++ b/lib/prism/translation/parser_current.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# typed: ignore
+
+module Prism
+ module Translation
+ case RUBY_VERSION
+ when /^3\.3\./
+ ParserCurrent = Parser33
+ when /^3\.4\./
+ ParserCurrent = Parser34
+ when /^3\.5\./, /^4\.0\./
+ ParserCurrent = Parser40
+ when /^4\.1\./
+ ParserCurrent = Parser41
+ else
+ # Keep this in sync with released Ruby.
+ parser = Parser40
+ major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
+ warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
+ "but you are running #{major}.#{minor}."
+ ParserCurrent = parser
+ end
+ end
+end
diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb
new file mode 100644
index 0000000000..720c7d548c
--- /dev/null
+++ b/lib/prism/translation/parser_versions.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
+ class Parser33 < Parser
+ def version # :nodoc:
+ 33
+ end
+ end
+
+ # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
+ class Parser34 < Parser
+ def version # :nodoc:
+ 34
+ end
+ end
+
+ # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`.
+ class Parser40 < Parser
+ def version # :nodoc:
+ 40
+ end
+ end
+
+ Parser35 = Parser40 # :nodoc:
+
+ # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`.
+ class Parser41 < Parser
+ def version # :nodoc:
+ 41
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index 3c06f6a40d..f179a149a1 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -1,6 +1,5 @@
# frozen_string_literal: true
-
-require "ripper"
+# :markup: markdown
module Prism
module Translation
@@ -19,31 +18,19 @@ module Prism
# The main known difference is that we may omit dispatching some events in
# some cases. This impacts the following events:
#
- # * on_assign_error
- # * on_comma
- # * on_ignored_nl
- # * on_ignored_sp
- # * on_kw
- # * on_label_end
- # * on_lbrace
- # * on_lbracket
- # * on_lparen
- # * on_nl
- # * on_op
- # * on_operator_ambiguous
- # * on_rbrace
- # * on_rbracket
- # * on_rparen
- # * on_semicolon
- # * on_sp
- # * on_symbeg
- # * on_tstring_beg
- # * on_tstring_end
+ # - on_assign_error
+ # - on_comma
+ # - on_ignored_nl
+ # - on_ignored_sp
+ # - on_nl
+ # - on_operator_ambiguous
+ # - on_semicolon
+ # - on_sp
#
class Ripper < Compiler
# Parses the given Ruby program read from +src+.
# +src+ must be a String or an IO or a object with a #gets method.
- def Ripper.parse(src, filename = "(ripper)", lineno = 1)
+ def self.parse(src, filename = "(ripper)", lineno = 1)
new(src, filename, lineno).parse
end
@@ -54,23 +41,24 @@ module Prism
# By default, this method does not handle syntax errors in +src+,
# use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+.
#
- # require 'ripper'
- # require 'pp'
+ # require "ripper"
+ # require "pp"
#
- # pp Ripper.lex("def m(a) nil end")
- # #=> [[[1, 0], :on_kw, "def", FNAME ],
- # [[1, 3], :on_sp, " ", FNAME ],
- # [[1, 4], :on_ident, "m", ENDFN ],
- # [[1, 5], :on_lparen, "(", BEG|LABEL],
- # [[1, 6], :on_ident, "a", ARG ],
- # [[1, 7], :on_rparen, ")", ENDFN ],
- # [[1, 8], :on_sp, " ", BEG ],
- # [[1, 9], :on_kw, "nil", END ],
- # [[1, 12], :on_sp, " ", END ],
- # [[1, 13], :on_kw, "end", END ]]
+ # pp Ripper.lex("def m(a) nil end")
+ # #=> [[[1, 0], :on_kw, "def", FNAME ],
+ # [[1, 3], :on_sp, " ", FNAME ],
+ # [[1, 4], :on_ident, "m", ENDFN ],
+ # [[1, 5], :on_lparen, "(", BEG|LABEL],
+ # [[1, 6], :on_ident, "a", ARG ],
+ # [[1, 7], :on_rparen, ")", ENDFN ],
+ # [[1, 8], :on_sp, " ", BEG ],
+ # [[1, 9], :on_kw, "nil", END ],
+ # [[1, 12], :on_sp, " ", END ],
+ # [[1, 13], :on_kw, "end", END ]]
#
- def Ripper.lex(src, filename = "-", lineno = 1, raise_errors: false)
- result = Prism.lex_compat(src, filepath: filename, line: lineno)
+ def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
+ coerced = coerce_source(src)
+ result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
if result.failure? && raise_errors
raise SyntaxError, result.errors.first.message
@@ -79,6 +67,34 @@ module Prism
end
end
+ # Tokenizes the Ruby program and returns an array of strings.
+ # The +filename+ and +lineno+ arguments are mostly ignored, since the
+ # return value is just the tokenized input.
+ # By default, this method does not handle syntax errors in +src+,
+ # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+.
+ #
+ # p Ripper.tokenize("def m(a) nil end")
+ # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
+ #
+ def self.tokenize(...)
+ lex(...).map { |token| token[2] }
+ end
+
+ # Mirros the various lex_types that ripper supports
+ def self.coerce_source(source) # :nodoc:
+ if source.is_a?(IO)
+ source.read
+ elsif source.respond_to?(:gets)
+ src = +""
+ while line = source.gets
+ src << line
+ end
+ src
+ else
+ source.to_str
+ end
+ end
+
# This contains a table of all of the parser events and their
# corresponding arity.
PARSER_EVENT_TABLE = {
@@ -331,7 +347,7 @@ module Prism
"__ENCODING__",
"__FILE__",
"__LINE__"
- ]
+ ].to_set
# A list of all of the Ruby binary operators.
BINARY_OPERATORS = [
@@ -356,7 +372,7 @@ module Prism
:/,
:*,
:**
- ]
+ ].to_set
private_constant :KEYWORDS, :BINARY_OPERATORS
@@ -368,17 +384,17 @@ module Prism
# returning +nil+ in such cases. Use the +raise_errors+ keyword
# to raise a SyntaxError for an error in +src+.
#
- # require "ripper"
- # require "pp"
+ # require "ripper"
+ # require "pp"
#
- # pp Ripper.sexp("def m(a) nil end")
- # #=> [:program,
- # [[:def,
- # [:@ident, "m", [1, 4]],
- # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]],
- # [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]]
+ # pp Ripper.sexp("def m(a) nil end")
+ # #=> [:program,
+ # [[:def,
+ # [:@ident, "m", [1, 4]],
+ # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]],
+ # [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]]
#
- def Ripper.sexp(src, filename = "-", lineno = 1, raise_errors: false)
+ def self.sexp(src, filename = "-", lineno = 1, raise_errors: false)
builder = SexpBuilderPP.new(src, filename, lineno)
sexp = builder.parse
if builder.error?
@@ -397,23 +413,23 @@ module Prism
# returning +nil+ in such cases. Use the +raise_errors+ keyword
# to raise a SyntaxError for an error in +src+.
#
- # require 'ripper'
- # require 'pp'
+ # require "ripper"
+ # require "pp"
#
- # pp Ripper.sexp_raw("def m(a) nil end")
- # #=> [:program,
- # [:stmts_add,
- # [:stmts_new],
- # [:def,
- # [:@ident, "m", [1, 4]],
- # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]],
- # [:bodystmt,
- # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]],
- # nil,
- # nil,
- # nil]]]]
+ # pp Ripper.sexp_raw("def m(a) nil end")
+ # #=> [:program,
+ # [:stmts_add,
+ # [:stmts_new],
+ # [:def,
+ # [:@ident, "m", [1, 4]],
+ # [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]],
+ # [:bodystmt,
+ # [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]],
+ # nil,
+ # nil,
+ # nil]]]]
#
- def Ripper.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false)
+ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false)
builder = SexpBuilder.new(src, filename, lineno)
sexp = builder.parse
if builder.error?
@@ -425,9 +441,93 @@ module Prism
end
end
+ autoload :Filter, "prism/translation/ripper/filter"
+ autoload :Lexer, "prism/translation/ripper/lexer"
autoload :SexpBuilder, "prism/translation/ripper/sexp"
autoload :SexpBuilderPP, "prism/translation/ripper/sexp"
+ # Provides optimized access to line and column information.
+ # Ripper bounds are mostly accessed in a linear fashion, so
+ # we can try a linear scan first and fall back to binary search.
+ class LineAndColumnCache # :nodoc:
+ # How many should it look ahead/behind before falling back to binary searching.
+ WINDOW = 8
+ private_constant :WINDOW
+
+ #: (Source source) -> void
+ def initialize(source)
+ @source = source
+ @offsets = source.offsets
+ @hint = 0
+ end
+
+ #: (Integer byte_offset) -> [Integer, Integer]
+ def line_and_column(byte_offset)
+ @hint = new_hint(byte_offset) || @source.find_line(byte_offset)
+ return [@hint + @source.start_line, byte_offset - @offsets[@hint]]
+ end
+
+ private
+
+ def new_hint(byte_offset)
+ if @offsets[@hint] <= byte_offset
+ # Same line?
+ if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset)
+ return @hint
+ end
+
+ # Scan forwards
+ limit = [@hint + WINDOW + 1, @offsets.size].min
+ idx = @hint + 1
+ while idx < limit
+ if @offsets[idx] > byte_offset
+ return idx - 1
+ end
+ if @offsets[idx] == byte_offset
+ return idx
+ end
+ idx += 1
+ end
+ else
+ # Scan backwards
+ limit = @hint > WINDOW ? @hint - WINDOW : 0
+ idx = @hint
+ while idx >= limit + 1
+ if @offsets[idx - 1] <= byte_offset
+ return idx - 1
+ end
+ idx -= 1
+ end
+ end
+
+ nil
+ end
+ end
+
+ # :stopdoc:
+ # This is not part of the public API but used by some gems.
+
+ # Ripper-internal bitflags.
+ LEX_STATE_NAMES = %i[
+ BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+ ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze
+ private_constant :LEX_STATE_NAMES
+
+ LEX_STATE_NAMES.each do |value, key|
+ const_set("EXPR_#{key}", value)
+ end
+ EXPR_NONE = 0
+ EXPR_VALUE = EXPR_BEG
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
+
+ def self.lex_state_name(state)
+ LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|")
+ end
+
+ # :startdoc:
+
# The source that is being parsed.
attr_reader :source
@@ -437,16 +537,17 @@ module Prism
# The current line number of the parser.
attr_reader :lineno
- # The current column number of the parser.
+ # The current column in bytes of the parser.
attr_reader :column
# Create a new Translation::Ripper object with the given source.
def initialize(source, filename = "(ripper)", lineno = 1)
- @source = source
+ @source = Ripper.coerce_source(source)
@filename = filename
@lineno = lineno
@column = 0
@result = nil
+ @line_and_column_cache = nil
end
##########################################################################
@@ -465,7 +566,12 @@ module Prism
bounds(location)
if comment.is_a?(InlineComment)
- on_comment(comment.slice)
+ # Inline comments always contain a newline if the line itself contains it
+ if result.source.source.bytesize > comment.location.end_offset
+ on_comment("#{comment.slice}\n")
+ else
+ on_comment(comment.slice)
+ end
else
offset = location.start_offset
lines = comment.slice.lines
@@ -546,9 +652,14 @@ module Prism
# Visitor methods
##########################################################################
+ # :stopdoc:
+
# alias foo bar
# ^^^^^^^^^^^^^
def visit_alias_method_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit(node.new_name)
old_name = visit(node.old_name)
@@ -559,6 +670,9 @@ module Prism
# alias $foo $bar
# ^^^^^^^^^^^^^^^
def visit_alias_global_variable_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit_alias_global_variable_node_value(node.new_name)
old_name = visit_alias_global_variable_node_value(node.old_name)
@@ -584,6 +698,10 @@ module Prism
# ^^^^^^^^^
def visit_alternation_pattern_node(node)
left = visit_pattern_node(node.left)
+
+ bounds(node.operator_loc)
+ on_op("|")
+
right = visit_pattern_node(node.right)
bounds(node.location)
@@ -594,7 +712,13 @@ module Prism
# parenthesis node that can be used to wrap patterns.
private def visit_pattern_node(node)
if node.is_a?(ParenthesesNode)
- visit(node.body)
+ bounds(node.opening_loc)
+ on_lparen("(")
+ result = visit(node.body)
+ bounds(node.closing_loc)
+ on_rparen(")")
+
+ result
else
visit(node)
end
@@ -604,6 +728,14 @@ module Prism
# ^^^^^^^
def visit_and_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "and"
+ on_kw("and")
+ else
+ on_op("&&")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -631,6 +763,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%i/
@@ -650,6 +784,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%W/
@@ -687,6 +823,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%I/
@@ -724,6 +862,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
else
@@ -740,15 +880,21 @@ module Prism
on_array(elements)
end
- # Dispatch a words_sep event that contains the space between the elements
+ # Dispatch words_sep events that contains the whitespace between the elements
# of list literals.
private def visit_words_sep(opening_loc, previous, current)
- end_offset = (previous.nil? ? opening_loc : previous.location).end_offset
- start_offset = current.location.start_offset
-
- if end_offset != start_offset
- bounds(current.location.copy(start_offset: end_offset))
- on_words_sep(source.byteslice(end_offset...start_offset))
+ start_offset = (previous.nil? ? opening_loc : previous.location).end_offset
+ end_offset = current.start_offset
+ length = end_offset - start_offset
+
+ if length > 0
+ whitespace = source.byteslice(start_offset, length)
+ current_offset = start_offset
+ whitespace.each_line do |part|
+ bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize))
+ on_words_sep(part)
+ current_offset += part.bytesize
+ end
end
end
@@ -774,9 +920,18 @@ module Prism
# ^^^^^
def visit_array_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+
requireds = visit_all(node.requireds) if node.requireds.any?
rest =
if (rest_node = node.rest).is_a?(SplatNode)
+ bounds(rest_node.operator_loc)
+ on_op("*")
+
if rest_node.expression.nil?
bounds(rest_node.location)
on_var_field(nil)
@@ -787,6 +942,10 @@ module Prism
posts = visit_all(node.posts) if node.posts.any?
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_aryptn(constant, requireds, rest, posts)
end
@@ -802,6 +961,12 @@ module Prism
# ^^^^
def visit_assoc_node(node)
key = visit(node.key)
+
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
value = visit(node.value)
bounds(node.location)
@@ -814,6 +979,9 @@ module Prism
# { **foo }
# ^^^^^
def visit_assoc_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
value = visit(node.value)
bounds(node.location)
@@ -830,8 +998,18 @@ module Prism
# begin end
# ^^^^^^^^^
def visit_begin_node(node)
+ if node.begin_keyword_loc
+ bounds(node.begin_keyword_loc)
+ on_kw("begin")
+ end
+
clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false)
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_begin(clauses)
end
@@ -843,7 +1021,7 @@ module Prism
on_stmts_add(on_stmts_new, on_void_stmt)
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline)
bounds(node.statements.location)
visit_statements_node_body(body)
@@ -852,12 +1030,15 @@ module Prism
rescue_clause = visit(node.rescue_clause)
else_clause =
unless (else_clause_node = node.else_clause).nil?
+ bounds(else_clause_node.else_keyword_loc)
+ on_kw("else")
+
else_statements =
if else_clause_node.statements.nil?
[nil]
else
body = else_clause_node.statements.body
- body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
body
end
@@ -879,7 +1060,7 @@ module Prism
on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil)
when StatementsNode
body = [*node.body]
- body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline)
stmts = visit_statements_node_body(body)
bounds(node.body.first.location)
@@ -894,6 +1075,8 @@ module Prism
# foo(&bar)
# ^^^^
def visit_block_argument_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
visit(node.expression)
end
@@ -907,6 +1090,13 @@ module Prism
# Visit a BlockNode.
def visit_block_node(node)
braces = node.opening == "{"
+ bounds(node.opening_loc)
+ if braces
+ on_lbrace("{")
+ else
+ on_kw("do")
+ end
+
parameters = visit(node.parameters)
body =
@@ -919,7 +1109,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -931,6 +1121,14 @@ module Prism
end
if braces
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ else
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
+ if braces
bounds(node.location)
on_brace_block(parameters, body)
else
@@ -942,12 +1140,15 @@ module Prism
# def foo(&bar); end
# ^^^^
def visit_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+
if node.name_loc.nil?
bounds(node.location)
on_blockarg(nil)
else
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
bounds(node.location)
on_blockarg(name)
@@ -956,6 +1157,9 @@ module Prism
# A block's parameters.
def visit_block_parameters_node(node)
+ bounds(node.opening_loc)
+ on_op("|")
+
parameters =
if node.parameters.nil?
on_params(nil, nil, nil, nil, nil, nil, nil)
@@ -970,6 +1174,9 @@ module Prism
false
end
+ bounds(node.closing_loc)
+ on_op("|")
+
bounds(node.location)
on_block_var(parameters, locals)
end
@@ -980,6 +1187,9 @@ module Prism
# break foo
# ^^^^^^^^^
def visit_break_node(node)
+ bounds(node.keyword_loc)
+ on_kw("break")
+
if node.arguments.nil?
bounds(node.location)
on_break(on_args_new)
@@ -1004,20 +1214,32 @@ module Prism
case node.name
when :[]
receiver = visit(node.receiver)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
+ block = visit(block_node)
bounds(node.location)
call = on_aref(receiver, arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.location)
on_method_add_block(call, block)
+ else
+ call
end
when :[]=
receiver = visit(node.receiver)
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
*arguments, last_argument = node.arguments.arguments
arguments << node.block if !node.block.nil?
@@ -1033,6 +1255,11 @@ module Prism
end
end
+ bounds(node.closing_loc)
+ on_rbracket("]")
+ bounds(node.equal_loc)
+ on_op("=")
+
bounds(node.location)
call = on_aref_field(receiver, arguments)
value = visit_write_value(last_argument)
@@ -1040,17 +1267,54 @@ module Prism
bounds(last_argument.location)
on_assign(call, value)
when :-@, :+@, :~
- receiver = visit(node.receiver)
+ bounds(node.message_loc)
+ on_op(node.message)
+ receiver = visit(node.receiver)
bounds(node.location)
on_unary(node.name, receiver)
when :!
- receiver = visit(node.receiver)
+ bounds(node.message_loc)
+ if node.message == "not"
+ on_kw("not")
- bounds(node.location)
- on_unary(node.message == "not" ? :not : :!, receiver)
- when *BINARY_OPERATORS
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ receiver =
+ if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil?
+ # The parens in `not()` just emit parens and nothing else.
+ bounds(node.receiver.opening_loc)
+ on_lparen("(")
+ bounds(node.receiver.closing_loc)
+ on_rparen(")")
+ nil
+ else
+ visit(node.receiver)
+ end
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+ bounds(node.location)
+ on_unary(:not, receiver)
+ else
+ on_op("!")
+
+ receiver = visit(node.receiver)
+
+ bounds(node.location)
+ on_unary(:!, receiver)
+ end
+ when BINARY_OPERATORS
receiver = visit(node.receiver)
+
+ bounds(node.message_loc)
+ on_op(node.message)
+
value = visit(node.arguments.arguments.first)
bounds(node.location)
@@ -1062,9 +1326,21 @@ module Prism
if node.variable_call?
on_vcall(message)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
- if node.opening_loc.nil? && arguments&.any?
+ if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any?
bounds(node.location)
on_command(message, arguments)
elsif !node.opening_loc.nil?
@@ -1075,11 +1351,11 @@ module Prism
on_method_add_arg(on_fcall(message), on_args_new)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
@@ -1087,7 +1363,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
message =
if node.message_loc.nil?
@@ -1097,13 +1373,30 @@ module Prism
visit_token(node.message, false)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil?
value = visit_write_value(node.arguments.arguments.first)
bounds(node.location)
on_assign(on_field(receiver, call_operator, message), value)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
if node.opening_loc.nil?
bounds(node.location)
@@ -1121,27 +1414,35 @@ module Prism
on_method_add_arg(on_call(receiver, call_operator, message), arguments)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
end
- # Visit the arguments and block of a call node and return the arguments
- # and block as they should be used.
- private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ # Extract the arguments and block Ripper-style, which means if the block
+ # is like `&b` then it's moved to arguments.
+ private def get_arguments_and_block(arguments_node, block_node)
arguments = arguments_node&.arguments || []
block = block_node
if block.is_a?(BlockArgumentNode)
- arguments << block
+ arguments += [block]
block = nil
end
+ [arguments, block]
+ end
+
+ # Visit the arguments and block of a call node and return the arguments
+ # and block as they should be used.
+ private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ arguments, block = get_arguments_and_block(arguments_node, block_node)
+
[
if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode)
visit(arguments.first)
@@ -1155,7 +1456,7 @@ module Prism
on_args_add_block(args, false)
end
end,
- visit(block)
+ block,
]
end
@@ -1173,7 +1474,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1181,8 +1482,8 @@ module Prism
bounds(node.location)
target = on_field(receiver, call_operator, message)
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -1195,7 +1496,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1217,7 +1518,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1239,6 +1540,9 @@ module Prism
if node.call_operator == "::"
receiver = visit(node.receiver)
+ bounds(node.call_operator_loc)
+ on_op("::")
+
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1248,7 +1552,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1262,6 +1566,10 @@ module Prism
# ^^^^^^^^^^
def visit_capture_pattern_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
target = visit(node.target)
bounds(node.location)
@@ -1271,10 +1579,21 @@ module Prism
# case foo; when bar; end
# ^^^^^^^^^^^^^^^^^^^^^^^
def visit_case_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map { |condition| visit(condition) }
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.consequent)) do |consequent, condition|
- on_when(*visit(condition), consequent)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_when(*condition, current)
end
bounds(node.location)
@@ -1284,10 +1603,23 @@ module Prism
# case foo; in bar; end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_case_match_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map do | condition|
+ visit(condition)
+ end
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.consequent)) do |consequent, condition|
- on_in(*visit(condition), consequent)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_in(*condition, current)
end
bounds(node.location)
@@ -1297,6 +1629,9 @@ module Prism
# class Foo; end
# ^^^^^^^^^^^^^^
def visit_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -1305,9 +1640,17 @@ module Prism
visit(node.constant_path)
end
+ if node.inheritance_operator_loc
+ bounds(node.inheritance_operator_loc)
+ on_op("<")
+ end
+
superclass = visit(node.superclass)
bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_class(constant_path, superclass, bodystmt)
end
@@ -1321,12 +1664,13 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_cvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1339,8 +1683,8 @@ module Prism
bounds(node.name_loc)
target = on_var_field(on_cvar(node.name.to_s))
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -1391,12 +1735,13 @@ module Prism
# Foo = 1
# ^^^^^^^
- #
- # Foo, Bar = 1
- # ^^^ ^^^
def visit_constant_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_const(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1409,8 +1754,8 @@ module Prism
bounds(node.name_loc)
target = on_var_field(on_const(node.name.to_s))
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -1456,16 +1801,24 @@ module Prism
# ^^^^^^^^
def visit_constant_path_node(node)
if node.parent.nil?
- bounds(node.child.location)
- child = on_const(node.child.name.to_s)
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
+ bounds(node.name_loc)
+ child = on_const(node.name.to_s)
bounds(node.location)
on_top_const_ref(child)
else
parent = visit(node.parent)
- bounds(node.child.location)
- child = on_const(node.child.name.to_s)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
+ bounds(node.name_loc)
+ child = on_const(node.name.to_s)
bounds(node.location)
on_const_path_ref(parent, child)
@@ -1474,11 +1827,12 @@ module Prism
# Foo::Bar = 1
# ^^^^^^^^^^^^
- #
- # Foo::Foo, Bar::Bar = 1
- # ^^^^^^^^ ^^^^^^^^
def visit_constant_path_write_node(node)
target = visit_constant_path_write_node_target(node.target)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1488,16 +1842,24 @@ module Prism
# Visit a constant path that is part of a write node.
private def visit_constant_path_write_node_target(node)
if node.parent.nil?
- bounds(node.child.location)
- child = on_const(node.child.name.to_s)
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
+ bounds(node.name_loc)
+ child = on_const(node.name.to_s)
bounds(node.location)
on_top_const_field(child)
else
parent = visit(node.parent)
- bounds(node.child.location)
- child = on_const(node.child.name.to_s)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
+ bounds(node.name_loc)
+ child = on_const(node.name.to_s)
bounds(node.location)
on_const_path_field(parent, child)
@@ -1508,10 +1870,9 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_constant_path_operator_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -1522,7 +1883,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_and_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("&&=")
@@ -1536,7 +1896,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_or_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("||=")
@@ -1558,16 +1917,24 @@ module Prism
# def self.foo; end
# ^^^^^^^^^^^^^^^^^
def visit_def_node(node)
+ bounds(node.def_keyword_loc)
+ on_kw("def")
+
receiver = visit(node.receiver)
operator =
if !node.operator_loc.nil?
bounds(node.operator_loc)
- visit_token(node.operator)
+ node.operator == "." ? on_period(".") : on_op("::")
end
bounds(node.name_loc)
name = visit_token(node.name_loc.slice)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
parameters =
if node.parameters.nil?
bounds(node.location)
@@ -1577,10 +1944,17 @@ module Prism
end
if !node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
parameters = on_paren(parameters)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
bodystmt =
if node.equal_loc.nil?
visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body)
@@ -1591,11 +1965,16 @@ module Prism
on_bodystmt(body, nil, nil, nil)
end
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
- if receiver.nil?
- on_def(name, parameters, bodystmt)
- else
+ if receiver
on_defs(receiver, operator, name, parameters, bodystmt)
+ else
+ on_def(name, parameters, bodystmt)
end
end
@@ -1605,24 +1984,59 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
+ bounds(node.keyword_loc)
+ on_kw("defined?")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ expression = visit(node.value)
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in Ripper it
+ # gets parsed as having a parentheses node. In this case we need to
+ # synthesize that node to match Ripper's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ bounds(node.lparen_loc.join(node.rparen_loc))
+ expression = on_paren(on_stmts_add(on_stmts_new, expression))
+ end
+
bounds(node.location)
- on_defined(visit(node.value))
+ on_defined(expression)
end
# if foo then bar else baz end
# ^^^^^^^^^^^^
def visit_else_node(node)
+ bounds(node.else_keyword_loc)
+ on_kw("else")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
body
end
+ else_statements = visit_statements_node_body(statements)
+
+ bounds(node.end_keyword_loc)
+ on_kw("end")
bounds(node.location)
- on_else(visit_statements_node_body(statements))
+ on_else(else_statements)
end
# "foo #{bar}"
@@ -1660,12 +2074,15 @@ module Prism
# Visit an EnsureNode node.
def visit_ensure_node(node)
+ bounds(node.ensure_keyword_loc)
+ on_kw("ensure")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
body
end
@@ -1686,6 +2103,14 @@ module Prism
# ^^^^^^^^^^^
def visit_find_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+ bounds(node.left.operator_loc)
+ on_op("*")
+
left =
if node.left.expression.nil?
bounds(node.left.location)
@@ -1695,6 +2120,10 @@ module Prism
end
requireds = visit_all(node.requireds) if node.requireds.any?
+
+ bounds(node.right.operator_loc)
+ on_op("*")
+
right =
if node.right.expression.nil?
bounds(node.right.location)
@@ -1703,6 +2132,10 @@ module Prism
visit(node.right.expression)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_fndptn(constant, left, requireds, right)
end
@@ -1711,6 +2144,10 @@ module Prism
# ^^^^^^^^^^
def visit_flip_flop_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -1730,8 +2167,18 @@ module Prism
# for foo in bar do end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_for_node(node)
+ bounds(node.for_keyword_loc)
+ on_kw("for")
+
index = visit(node.index)
+ bounds(node.in_keyword_loc)
+ on_kw("in")
+
collection = visit(node.collection)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1740,6 +2187,9 @@ module Prism
visit(node.statements)
end
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_for(index, collection, statements)
end
@@ -1748,6 +2198,7 @@ module Prism
# ^^^
def visit_forwarding_arguments_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1755,6 +2206,7 @@ module Prism
# ^^^
def visit_forwarding_parameter_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1764,6 +2216,9 @@ module Prism
# super {}
# ^^^^^^^^
def visit_forwarding_super_node(node)
+ bounds(node.keyword_loc)
+ on_kw("super")
+
if node.block.nil?
bounds(node.location)
on_zsuper
@@ -1784,12 +2239,13 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_gvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1802,8 +2258,8 @@ module Prism
bounds(node.name_loc)
target = on_var_field(on_gvar(node.name.to_s))
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -1848,6 +2304,9 @@ module Prism
# {}
# ^^
def visit_hash_node(node)
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
elements =
if node.elements.any?
args = visit_all(node.elements)
@@ -1856,6 +2315,8 @@ module Prism
on_assoclist_from_args(args)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_hash(elements)
end
@@ -1864,6 +2325,15 @@ module Prism
# ^^
def visit_hash_pattern_node(node)
constant = visit(node.constant)
+
+ if node.constant
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ elsif node.opening_loc
+ bounds(node.opening_loc)
+ on_lbrace("{")
+ end
+
elements =
if node.elements.any? || !node.rest.nil?
node.elements.map do |element|
@@ -1886,12 +2356,21 @@ module Prism
rest =
case node.rest
when AssocSplatNode
+ bounds(node.rest.operator_loc)
+ on_op("**")
visit(node.rest.value)
when NoKeywordsParameterNode
bounds(node.rest.location)
on_var_field(visit(node.rest))
end
+ if node.constant
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ end
bounds(node.location)
on_hshptn(constant, elements, rest)
end
@@ -1907,13 +2386,27 @@ module Prism
def visit_if_node(node)
if node.then_keyword == "?"
predicate = visit(node.predicate)
+
+ bounds(node.then_keyword_loc)
+ on_op("?")
+
truthy = visit(node.statements.body.first)
- falsy = visit(node.consequent.statements.body.first)
+
+ bounds(node.subsequent.else_keyword_loc)
+ on_op(":")
+
+ falsy = visit(node.subsequent.statements.body.first)
bounds(node.location)
on_ifop(predicate, truthy, falsy)
elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
+ if node.then_keyword_loc && node.then_keyword != "?"
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1921,16 +2414,23 @@ module Prism
else
visit(node.statements)
end
- consequent = visit(node.consequent)
+ subsequent = visit(node.subsequent)
+
+ if node.end_keyword_loc && !node.subsequent
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
bounds(node.location)
if node.if_keyword == "if"
- on_if(predicate, statements, consequent)
+ on_if(predicate, statements, subsequent)
else
- on_elsif(predicate, statements, consequent)
+ on_elsif(predicate, statements, subsequent)
end
else
statements = visit(node.statements.body.first)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
bounds(node.location)
@@ -1960,9 +2460,16 @@ module Prism
# ^^^^^^^^^^^^^^^^^^^^^
def visit_in_node(node)
# This is a special case where we're not going to call on_in directly
- # because we don't have access to the consequent. Instead, we'll return
+ # because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.in_loc)
+ on_kw("in")
+
pattern = visit_pattern_node(node.pattern)
+ if node.then_loc
+ bounds(node.then_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1978,13 +2485,20 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_index_operator_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -1995,8 +2509,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_and_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2012,8 +2533,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_or_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2029,8 +2557,15 @@ module Prism
# ^^^^^^^^
def visit_index_target_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
on_aref_field(receiver, arguments)
end
@@ -2047,6 +2582,10 @@ module Prism
def visit_instance_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ivar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2059,8 +2598,8 @@ module Prism
bounds(node.name_loc)
target = on_var_field(on_ivar(node.name.to_s))
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -2149,20 +2688,37 @@ module Prism
# "foo #{bar}"
# ^^^^^^^^^^^^
def visit_interpolated_string_node(node)
- if node.opening&.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node)
+ with_string_bounds(node) do
+ if node.opening&.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node)
- bounds(node.location)
- on_string_literal(heredoc)
- elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
- first, *rest = node.parts
- rest.inject(visit(first)) do |content, part|
- concat = visit(part)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
+ first, *rest = node.parts
+ rest.inject(visit(first)) do |content, part|
+ concat = visit(part)
+
+ bounds(part.location)
+ on_string_concat(content, concat)
+ end
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_string_content) do |content, part|
+ on_string_add(content, visit_string_content(part))
+ end
- bounds(part.location)
- on_string_concat(content, concat)
+ bounds(node.location)
+ on_string_literal(parts)
end
- else
+ end
+ end
+
+ # :"foo #{bar}"
+ # ^^^^^^^^^^^^^
+ def visit_interpolated_symbol_node(node)
+ with_string_bounds(node) do
bounds(node.parts.first.location)
parts =
node.parts.inject(on_string_content) do |content, part|
@@ -2170,40 +2726,29 @@ module Prism
end
bounds(node.location)
- on_string_literal(parts)
+ on_dyna_symbol(parts)
end
end
- # :"foo #{bar}"
- # ^^^^^^^^^^^^^
- def visit_interpolated_symbol_node(node)
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_string_content) do |content, part|
- on_string_add(content, visit_string_content(part))
- end
-
- bounds(node.location)
- on_dyna_symbol(parts)
- end
-
# `foo #{bar}`
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
- if node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node)
+ with_string_bounds(node) do
+ if node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_xstring_new) do |content, part|
- on_xstring_add(content, visit_string_content(part))
- end
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_xstring_new) do |content, part|
+ on_xstring_add(content, visit_string_content(part))
+ end
- bounds(node.location)
- on_xstring_literal(parts)
+ bounds(node.location)
+ on_xstring_literal(parts)
+ end
end
end
@@ -2218,6 +2763,13 @@ module Prism
end
# -> { it }
+ # ^^
+ def visit_it_local_variable_read_node(node)
+ bounds(node.location)
+ on_vcall(on_ident(node.slice))
+ end
+
+ # -> { it }
# ^^^^^^^^^
def visit_it_parameters_node(node)
end
@@ -2237,6 +2789,9 @@ module Prism
# def foo(**); end
# ^^
def visit_keyword_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
if node.name_loc.nil?
bounds(node.location)
on_kwrest_param(nil)
@@ -2256,6 +2811,11 @@ module Prism
parameters =
if node.parameters.is_a?(BlockParametersNode)
+ if node.parameters.opening_loc
+ bounds(node.parameters.opening_loc)
+ on_lparen("(")
+ end
+
# Ripper does not track block-locals within lambdas, so we skip
# directly to the parameters here.
params =
@@ -2266,6 +2826,13 @@ module Prism
visit(node.parameters.parameters)
end
+ visit_all(node.parameters.locals)
+
+ if node.parameters.closing_loc
+ bounds(node.parameters.closing_loc)
+ on_rparen(")")
+ end
+
if node.parameters.opening_loc.nil?
params
else
@@ -2278,9 +2845,11 @@ module Prism
end
braces = node.opening == "{"
+ bounds(node.opening_loc)
if braces
- bounds(node.opening_loc)
on_tlambeg(node.opening)
+ else
+ on_kw("do")
end
body =
@@ -2293,7 +2862,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -2304,6 +2873,13 @@ module Prism
raise
end
+ bounds(node.closing_loc)
+ if braces
+ on_rbrace("}")
+ else
+ on_kw("end")
+ end
+
bounds(node.location)
on_lambda(parameters, body)
end
@@ -2312,12 +2888,7 @@ module Prism
# ^^^
def visit_local_variable_read_node(node)
bounds(node.location)
-
- if node.name == :"0it"
- on_vcall(on_ident(node.slice))
- else
- on_var_ref(on_ident(node.slice))
- end
+ on_var_ref(on_ident(node.slice))
end
# foo = 1
@@ -2325,6 +2896,10 @@ module Prism
def visit_local_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ident(node.name_loc.slice))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2337,8 +2912,8 @@ module Prism
bounds(node.name_loc)
target = on_var_field(on_ident(node.name_loc.slice))
- bounds(node.operator_loc)
- operator = on_op("#{node.operator}=")
+ bounds(node.binary_operator_loc)
+ operator = on_op("#{node.binary_operator}=")
value = visit_write_value(node.value)
bounds(node.location)
@@ -2399,6 +2974,8 @@ module Prism
# ^^^^^^^^^^
def visit_match_predicate_node(node)
value = visit(node.value)
+ bounds(node.operator_loc)
+ on_kw("in")
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2408,6 +2985,10 @@ module Prism
# ^^^^^^^^^^
def visit_match_required_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2421,13 +3002,16 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error.
- def visit_missing_node(node)
- raise "Cannot visit missing nodes directly."
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery nodes directly."
end
# module Foo; end
# ^^^^^^^^^^^^^^^
def visit_module_node(node)
+ bounds(node.module_keyword_loc)
+ on_kw("module")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -2438,6 +3022,9 @@ module Prism
bodystmt = visit_body_node(node.constant_path.location, node.body, true)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_module(constant_path, bodystmt)
end
@@ -2445,9 +3032,19 @@ module Prism
# (foo, bar), bar = qux
# ^^^^^^^^^^
def visit_multi_target_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
if node.lparen_loc.nil?
targets
else
@@ -2499,9 +3096,22 @@ module Prism
# foo, bar = baz
# ^^^^^^^^^^^^^^
def visit_multi_write_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ bounds(node.operator_loc)
+ on_op("=")
+
unless node.lparen_loc.nil?
bounds(node.lparen_loc)
targets = on_mlhs_paren(targets)
@@ -2519,6 +3129,9 @@ module Prism
# next foo
# ^^^^^^^^
def visit_next_node(node)
+ bounds(node.keyword_loc)
+ on_kw("next")
+
if node.arguments.nil?
bounds(node.location)
on_next(on_args_new)
@@ -2537,9 +3150,24 @@ module Prism
on_var_ref(on_kw("nil"))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+ bounds(node.keyword_loc)
+ on_kw("nil")
+ bounds(node.location)
+ on_blockarg(:nil)
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+ bounds(node.keyword_loc)
+ on_kw("nil")
bounds(node.location)
on_nokw_param(nil)
@@ -2572,7 +3200,11 @@ module Prism
# ^^^^^^^
def visit_optional_parameter_node(node)
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit(node.value)
[name, value]
@@ -2582,6 +3214,14 @@ module Prism
# ^^^^^^
def visit_or_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "or"
+ on_kw("or")
+ else
+ on_op("||")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -2605,9 +3245,19 @@ module Prism
# Visit a destructured positional parameter node.
private def visit_destructured_parameter_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
bounds(node.lparen_loc)
on_mlhs_paren(targets)
end
@@ -2618,6 +3268,9 @@ module Prism
# (1)
# ^^^
def visit_parentheses_node(node)
+ bounds(node.opening_loc)
+ on_lparen("(")
+
body =
if node.body.nil?
on_stmts_add(on_stmts_new, on_void_stmt)
@@ -2625,6 +3278,8 @@ module Prism
visit(node.body)
end
+ bounds(node.closing_loc)
+ on_rparen(")")
bounds(node.location)
on_paren(body)
end
@@ -2632,8 +3287,15 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+ bounds(node.lparen_loc)
+ on_lparen("(")
+
expression = visit(node.expression)
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.location)
on_begin(expression)
end
@@ -2641,12 +3303,20 @@ module Prism
# foo = 1 and bar => ^foo
# ^^^^
def visit_pinned_variable_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+
visit(node.variable)
end
# END {}
# ^^^^^^
def visit_post_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("END")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2655,6 +3325,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_END(statements)
end
@@ -2662,6 +3334,11 @@ module Prism
# BEGIN {}
# ^^^^^^^^
def visit_pre_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("BEGIN")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2670,6 +3347,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_BEGIN(statements)
end
@@ -2677,7 +3356,7 @@ module Prism
# The top-level program node.
def visit_program_node(node)
body = node.statements.body
- body << nil if body.empty?
+ body = [nil] if body.empty?
statements = visit_statements_node_body(body)
bounds(node.location)
@@ -2688,6 +3367,10 @@ module Prism
# ^^^^
def visit_range_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -2708,6 +3391,7 @@ module Prism
# ^^^^
def visit_redo_node(node)
bounds(node.location)
+ on_kw("redo")
on_redo
end
@@ -2750,6 +3434,9 @@ module Prism
# foo rescue bar
# ^^^^^^^^^^^^^^
def visit_rescue_modifier_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
expression = visit_write_value(node.expression)
rescue_expression = visit(node.rescue_expression)
@@ -2760,6 +3447,9 @@ module Prism
# begin; rescue; end
# ^^^^^^^
def visit_rescue_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
exceptions =
case node.exceptions.length
when 0
@@ -2797,6 +3487,11 @@ module Prism
end
end
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
reference = visit(node.reference)
statements =
if node.statements.nil?
@@ -2806,10 +3501,10 @@ module Prism
visit(node.statements)
end
- consequent = visit(node.consequent)
+ subsequent = visit(node.subsequent)
bounds(node.location)
- on_rescue(exceptions, reference, statements, consequent)
+ on_rescue(exceptions, reference, statements, subsequent)
end
# def foo(*bar); end
@@ -2818,12 +3513,15 @@ module Prism
# def foo(*); end
# ^
def visit_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
+
if node.name_loc.nil?
bounds(node.location)
on_rest_param(nil)
else
bounds(node.name_loc)
- on_rest_param(visit_token(node.name.to_s))
+ on_rest_param(on_ident(node.name.to_s))
end
end
@@ -2831,6 +3529,7 @@ module Prism
# ^^^^^
def visit_retry_node(node)
bounds(node.location)
+ on_kw("retry")
on_retry
end
@@ -2840,6 +3539,9 @@ module Prism
# return 1
# ^^^^^^^^
def visit_return_node(node)
+ bounds(node.keyword_loc)
+ on_kw("return")
+
if node.arguments.nil?
bounds(node.location)
on_return0
@@ -2866,9 +3568,17 @@ module Prism
# class << self; end
# ^^^^^^^^^^^^^^^^^^
def visit_singleton_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+ bounds(node.operator_loc)
+ on_op("<<")
+
expression = visit(node.expression)
bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_sclass(expression, bodystmt)
end
@@ -2903,6 +3613,8 @@ module Prism
# def foo(*); bar(*); end
# ^
def visit_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
visit(node.expression)
end
@@ -2925,26 +3637,68 @@ module Prism
# "foo"
# ^^^^^
def visit_string_node(node)
- if (content = node.content).empty?
- bounds(node.location)
- on_string_literal(on_string_content)
- elsif (opening = node.opening) == "?"
- bounds(node.location)
- on_CHAR("?#{node.content}")
- elsif opening.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if (content = node.content).empty?
+ bounds(node.location)
+ on_string_literal(on_string_content)
+ elsif (opening = node.opening) == "?"
+ bounds(node.location)
+ on_CHAR("?#{node.content}")
+ elsif opening.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node.to_interpolated)
- bounds(node.location)
- on_string_literal(heredoc)
- else
- bounds(node.content_loc)
- tstring_content = on_tstring_content(content)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ tstring_content = on_tstring_content(content)
- bounds(node.location)
- on_string_literal(on_string_add(on_string_content, tstring_content))
+ bounds(node.location)
+ on_string_literal(on_string_add(on_string_content, tstring_content))
+ end
end
end
+ # Responsible for emitting the various string-like begin/end events
+ private def with_string_bounds(node)
+ # `foo "bar": baz` doesn't emit the closing location
+ assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":")
+
+ is_heredoc = opening&.start_with?("<<")
+ if is_heredoc
+ bounds(node.opening_loc)
+ on_heredoc_beg(node.opening)
+ elsif opening&.start_with?(":", "%s")
+ bounds(node.opening_loc)
+ on_symbeg(node.opening)
+ elsif opening&.start_with?("`", "%x")
+ bounds(node.opening_loc)
+ on_backtick(node.opening)
+ elsif opening && !opening.start_with?("?")
+ bounds(node.opening_loc)
+ on_tstring_beg(opening)
+ end
+
+ result = yield
+ if assoc
+ if node.closing != ":"
+ bounds(node.closing_loc)
+ on_label_end(node.closing)
+ end
+ return result
+ end
+
+ if is_heredoc
+ bounds(node.closing_loc)
+ on_heredoc_end(node.closing)
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_tstring_end(node.closing)
+ end
+
+ result
+ end
+
# Ripper gives back the escaped string content but strips out the common
# leading whitespace. Prism gives back the unescaped string content and
# a location for the escaped string content. Unfortunately these don't
@@ -3022,42 +3776,39 @@ module Prism
# Visit a heredoc node that is representing a string.
private def visit_heredoc_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_string_content) do |parts, part|
- on_string_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_string_content) do |parts, part|
+ on_string_add(parts, part)
+ end
end
# Visit a heredoc node that is representing an xstring.
private def visit_heredoc_x_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
- on_xstring_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
+ on_xstring_add(parts, part)
+ end
end
# super(foo)
# ^^^^^^^^^^
def visit_super_node(node)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+ bounds(node.keyword_loc)
+ on_kw("super")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
if !node.lparen_loc.nil?
bounds(node.lparen_loc)
@@ -3067,35 +3818,36 @@ module Prism
bounds(node.location)
call = on_super(arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
# :foo
# ^^^^
def visit_symbol_node(node)
- if (opening = node.opening)&.match?(/^%s|['"]:?$/)
- bounds(node.value_loc)
- content = on_string_content
-
- if !(value = node.value).empty?
- content = on_string_add(content, on_tstring_content(value))
+ with_string_bounds(node) do
+ if node.value_loc.nil?
+ bounds(node.location)
+ on_dyna_symbol(on_string_content)
+ elsif (opening = node.opening)&.match?(/^%s|['"]:?$/)
+ bounds(node.value_loc)
+ content = on_string_add(on_string_content, on_tstring_content(node.value))
+ bounds(node.location)
+ on_dyna_symbol(content)
+ elsif (closing = node.closing) == ":"
+ bounds(node.location)
+ on_label("#{node.value}:")
+ elsif opening.nil? && node.closing_loc.nil?
+ bounds(node.value_loc)
+ on_symbol_literal(visit_token(node.value))
+ else
+ bounds(node.value_loc)
+ on_symbol_literal(on_symbol(visit_token(node.value)))
end
-
- on_dyna_symbol(content)
- elsif (closing = node.closing) == ":"
- bounds(node.location)
- on_label("#{node.value}:")
- elsif opening.nil? && node.closing_loc.nil?
- bounds(node.value_loc)
- on_symbol_literal(visit_token(node.value))
- else
- bounds(node.value_loc)
- on_symbol_literal(on_symbol(visit_token(node.value)))
end
end
@@ -3109,6 +3861,9 @@ module Prism
# undef foo
# ^^^^^^^^^
def visit_undef_node(node)
+ bounds(node.keyword_loc)
+ on_kw("undef")
+
names = visit_all(node.names)
bounds(node.location)
@@ -3122,7 +3877,13 @@ module Prism
# ^^^^^^^^^^^^^^
def visit_unless_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3130,12 +3891,19 @@ module Prism
else
visit(node.statements)
end
- consequent = visit(node.consequent)
+ else_clause = visit(node.else_clause)
+
+ if node.end_keyword_loc && !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
bounds(node.location)
- on_unless(predicate, statements, consequent)
+ on_unless(predicate, statements, else_clause)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3149,7 +3917,14 @@ module Prism
# bar until foo
# ^^^^^^^^^^^^^
def visit_until_node(node)
+ bounds(node.keyword_loc)
+ on_kw("until")
+
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
statements =
if node.statements.nil?
@@ -3159,6 +3934,11 @@ module Prism
visit(node.statements)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_until(predicate, statements)
else
@@ -3174,9 +3954,16 @@ module Prism
# ^^^^^^^^^^^^^
def visit_when_node(node)
# This is a special case where we're not going to call on_when directly
- # because we don't have access to the consequent. Instead, we'll return
+ # because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.keyword_loc)
+ on_kw("when")
+
conditions = visit_arguments(node.conditions)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3195,7 +3982,17 @@ module Prism
# ^^^^^^^^^^^^^
def visit_while_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("while")
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3208,6 +4005,8 @@ module Prism
on_while(predicate, statements)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("while")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3218,20 +4017,22 @@ module Prism
# `foo`
# ^^^^^
def visit_x_string_node(node)
- if node.unescaped.empty?
- bounds(node.location)
- on_xstring_literal(on_xstring_new)
- elsif node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if node.unescaped.empty?
+ bounds(node.location)
+ on_xstring_literal(on_xstring_new)
+ elsif node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node.to_interpolated)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.content_loc)
- content = on_tstring_content(node.content)
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ content = on_tstring_content(node.content)
- bounds(node.location)
- on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ bounds(node.location)
+ on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ end
end
end
@@ -3241,10 +4042,18 @@ module Prism
# yield 1
# ^^^^^^^
def visit_yield_node(node)
+ bounds(node.keyword_loc)
+ on_kw("yield")
+
if node.arguments.nil? && node.lparen_loc.nil?
bounds(node.location)
on_yield0
else
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
arguments =
if node.arguments.nil?
bounds(node.location)
@@ -3254,6 +4063,8 @@ module Prism
end
unless node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
arguments = on_paren(arguments)
end
@@ -3267,7 +4078,11 @@ module Prism
# Lazily initialize the parse result.
def result
- @result ||= Prism.parse(source)
+ @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
+ end
+
+ def line_and_column_cache
+ @line_and_column_cache ||= LineAndColumnCache.new(result.source)
end
##########################################################################
@@ -3288,30 +4103,34 @@ module Prism
# Visit the string content of a particular node. This method is used to
# split into the various token types.
def visit_token(token, allow_keywords = true)
- case token
- when "."
+ if token == "."
on_period(token)
- when "`"
+ elsif token == "`"
on_backtick(token)
- when *(allow_keywords ? KEYWORDS : [])
+ elsif allow_keywords && KEYWORDS.include?(token)
on_kw(token)
- when /^_/
+ elsif token.start_with?("_")
on_ident(token)
- when /^[[:upper:]]\w*$/
+ elsif token.match?(/^[[:upper:]]\w*$/)
on_const(token)
- when /^@@/
+ elsif token.start_with?("@@")
on_cvar(token)
- when /^@/
+ elsif token.start_with?("@")
on_ivar(token)
- when /^\$/
+ elsif token.start_with?("$")
on_gvar(token)
- when /^[[:punct:]]/
+ elsif token.match?(/^[[:punct:]]/)
on_op(token)
else
on_ident(token)
end
end
+ # Visit either `.`, `&.`, or `::`.
+ def visit_call_operator(token)
+ token == "." ? on_period(token) : on_op(token)
+ end
+
# Visit a node that represents a number. We need to explicitly handle the
# unary - operator.
def visit_number_node(node)
@@ -3319,6 +4138,9 @@ module Prism
location = node.location
if slice[0] == "-"
+ bounds(location.copy(length: 1))
+ on_op("-")
+
bounds(location.copy(start_offset: location.start_offset + 1))
value = yield slice[1..-1]
@@ -3367,26 +4189,24 @@ module Prism
# This method is responsible for updating lineno and column information
# to reflect the current node.
- #
- # This method could be drastically improved with some caching on the start
- # of every line, but for now it's good enough.
def bounds(location)
- @lineno = location.start_line
- @column = location.start_column
+ @lineno, @column = line_and_column_cache.line_and_column(location.start_offset)
end
+ # :startdoc:
+
##########################################################################
# Ripper interface
##########################################################################
# :stopdoc:
def _dispatch_0; end
- def _dispatch_1(_); end
- def _dispatch_2(_, _); end
- def _dispatch_3(_, _, _); end
- def _dispatch_4(_, _, _, _); end
- def _dispatch_5(_, _, _, _, _); end
- def _dispatch_7(_, _, _, _, _, _, _); end
+ def _dispatch_1(arg); arg end
+ def _dispatch_2(arg, _); arg end
+ def _dispatch_3(arg, _, _); arg end
+ def _dispatch_4(arg, _, _, _); arg end
+ def _dispatch_5(arg, _, _, _, _); arg end
+ def _dispatch_7(arg, _, _, _, _, _, _); arg end
# :startdoc:
#
diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb
new file mode 100644
index 0000000000..19deef2d37
--- /dev/null
+++ b/lib/prism/translation/ripper/filter.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Prism
+ module Translation
+ class Ripper
+ class Filter # :nodoc:
+ # :stopdoc:
+ def initialize(src, filename = '-', lineno = 1)
+ @__lexer = Lexer.new(src, filename, lineno)
+ @__line = nil
+ @__col = nil
+ @__state = nil
+ end
+
+ def filename
+ @__lexer.filename
+ end
+
+ def lineno
+ @__line
+ end
+
+ def column
+ @__col
+ end
+
+ def state
+ @__state
+ end
+
+ def parse(init = nil)
+ data = init
+ @__lexer.lex.each do |pos, event, tok, state|
+ @__line, @__col = *pos
+ @__state = state
+ data = if respond_to?(event, true)
+ then __send__(event, tok, data)
+ else on_default(event, tok, data)
+ end
+ end
+ data
+ end
+
+ private
+
+ def on_default(event, token, data)
+ data
+ end
+ # :startdoc:
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb
new file mode 100644
index 0000000000..c6aeae4bd7
--- /dev/null
+++ b/lib/prism/translation/ripper/lexer.rb
@@ -0,0 +1,133 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+require_relative "../ripper"
+
+module Prism
+ module Translation
+ class Ripper
+ class Lexer < Ripper # :nodoc:
+ class State # :nodoc:
+ attr_reader :to_int, :to_s
+
+ def initialize(i)
+ @to_int = i
+ @to_s = Ripper.lex_state_name(i)
+ freeze
+ end
+
+ def [](index)
+ case index
+ when 0, :to_int
+ @to_int
+ when 1, :to_s
+ @to_s
+ else
+ nil
+ end
+ end
+
+ alias to_i to_int
+ alias inspect to_s
+ def pretty_print(q) q.text(to_s) end
+ def ==(i) super or to_int == i end
+ def &(i) self.class.new(to_int & i) end
+ def |(i) self.class.new(to_int | i) end
+ def allbits?(i) to_int.allbits?(i) end
+ def anybits?(i) to_int.anybits?(i) end
+ def nobits?(i) to_int.nobits?(i) end
+
+ # Instances are frozen and there are only a handful of them so we
+ # cache them here.
+ STATES = Hash.new { |hash, key| hash[key] = State.new(key) }
+ private_constant :STATES
+
+ def self.[](i)
+ STATES[i]
+ end
+ end
+
+ class Elem # :nodoc:
+ attr_accessor :pos, :event, :tok, :state, :message
+
+ def initialize(pos, event, tok, state, message = nil)
+ @pos = pos
+ @event = event
+ @tok = tok
+ @state = State[state]
+ @message = message
+ end
+
+ def [](index)
+ case index
+ when 0, :pos
+ @pos
+ when 1, :event
+ @event
+ when 2, :tok
+ @tok
+ when 3, :state
+ @state
+ when 4, :message
+ @message
+ else
+ nil
+ end
+ end
+
+ def inspect
+ "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>"
+ end
+
+ alias to_s inspect
+
+ def pretty_print(q)
+ q.group(2, "#<#{self.class}:", ">") {
+ q.breakable
+ q.text("#{event}@#{pos[0]}:#{pos[1]}")
+ q.breakable
+ state.pretty_print(q)
+ q.breakable
+ q.text("token: ")
+ tok.pretty_print(q)
+ if message
+ q.breakable
+ q.text("message: ")
+ q.text(message)
+ end
+ }
+ end
+
+ def to_a
+ if @message
+ [@pos, @event, @tok, @state, @message]
+ else
+ [@pos, @event, @tok, @state]
+ end
+ end
+ end
+
+ # Pretty much just the same as Prism.lex_compat.
+ def lex(raise_errors: false)
+ Ripper.lex(@source, filename, lineno, raise_errors: raise_errors)
+ end
+
+ # Returns the lex_compat result wrapped in `Elem`. Errors are omitted.
+ # Since ripper is a streaming parser, tokens are expected to be emitted in the order
+ # that the parser encounters them. This is not implemented.
+ def parse(...)
+ lex(...).map do |position, event, token, state|
+ Elem.new(position, event, token, state.to_int)
+ end
+ end
+
+ # Similar to parse but ripper sorts the elements by position in the source. Also
+ # includes errors. Since prism does error recovery, in cases of syntax errors
+ # the result may differ greatly compared to ripper.
+ def scan(...)
+ parse(...)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb
index dc26a639a3..46c0333544 100644
--- a/lib/prism/translation/ripper/sexp.rb
+++ b/lib/prism/translation/ripper/sexp.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
require_relative "../ripper"
@@ -7,9 +8,7 @@ module Prism
class Ripper
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
# returns the arrays of [type, *children].
- class SexpBuilder < Ripper
- # :stopdoc:
-
+ class SexpBuilder < Ripper # :nodoc:
attr_reader :error
private
@@ -64,16 +63,12 @@ module Prism
remove_method :on_parse_error
alias on_parse_error on_error
alias compile_error on_error
-
- # :startdoc:
end
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
# returns the same values as ::Ripper::SexpBuilder except with a couple of
# niceties that flatten linked lists into arrays.
- class SexpBuilderPP < SexpBuilder
- # :stopdoc:
-
+ class SexpBuilderPP < SexpBuilder # :nodoc:
private
def on_heredoc_dedent(val, width)
@@ -117,8 +112,6 @@ module Prism
alias_method "on_#{event}", :_dispatch_event_push
end
end
-
- # :startdoc:
end
end
end
diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb
index 10e21cd16a..00ed625da3 100644
--- a/lib/prism/translation/ripper/shim.rb
+++ b/lib/prism/translation/ripper/shim.rb
@@ -2,4 +2,6 @@
# This writes the prism ripper translation into the Ripper constant so that
# users can transparently use Ripper without any changes.
+# :stopdoc:
Ripper = Prism::Translation::Ripper
+# :startdoc:
diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb
index 5c59fe3181..42bc5ee658 100644
--- a/lib/prism/translation/ruby_parser.rb
+++ b/lib/prism/translation/ruby_parser.rb
@@ -1,6 +1,17 @@
# frozen_string_literal: true
+# :markup: markdown
-require "ruby_parser"
+begin
+ require "sexp"
+rescue LoadError
+ warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.})
+ exit(1)
+end
+
+class RubyParser # :nodoc:
+ class SyntaxError < RuntimeError # :nodoc:
+ end
+end
module Prism
module Translation
@@ -8,9 +19,9 @@ module Prism
# seattlerb/ruby_parser gem's syntax tree.
class RubyParser
# A prism visitor that builds Sexp objects.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# This is the name of the file that we are compiling. We set it on every
- # Sexp object that is generated, and also use it to compile __FILE__
+ # Sexp object that is generated, and also use it to compile `__FILE__`
# nodes.
attr_reader :file
@@ -50,7 +61,19 @@ module Prism
# a and b
# ^^^^^^^
def visit_and_node(node)
- s(node, :and, visit(node.left), visit(node.right))
+ left = visit(node.left)
+
+ if left[0] == :and
+ # ruby_parser has the and keyword as right-associative as opposed to
+ # prism which has it as left-associative. We reverse that
+ # associativity here.
+ nest = left
+ nest = nest[2] while nest[2][0] == :and
+ nest[2] = s(node, :and, nest[2], visit(node.right))
+ left
+ else
+ s(node, :and, left, visit(node.right))
+ end
end
# []
@@ -114,7 +137,7 @@ module Prism
# $+
# ^^
def visit_back_reference_read_node(node)
- s(node, :back_ref, node.name.name.delete_prefix("$").to_sym)
+ s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym)
end
# begin end
@@ -130,7 +153,7 @@ module Prism
end
current = node.rescue_clause
- until (current = current.consequent).nil?
+ until (current = current.subsequent).nil?
result << visit(current)
end
end
@@ -246,6 +269,11 @@ module Prism
when RegularExpressionNode, InterpolatedRegularExpressionNode
return s(node, :match2, visit(node.receiver), visit(node.arguments.arguments.first))
end
+
+ case node.arguments.arguments.first
+ when RegularExpressionNode, InterpolatedRegularExpressionNode
+ return s(node, :match3, visit(node.arguments.arguments.first), visit(node.receiver))
+ end
end
end
@@ -271,9 +299,9 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_call_operator_write_node(node)
if op_asgn?(node)
- s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, node.operator)
+ s(node, op_asgn_type(node, :op_asgn), visit(node.receiver), visit_write_value(node.value), node.read_name, node.binary_operator)
else
- s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, node.operator, visit_write_value(node.value))
+ s(node, op_asgn_type(node, :op_asgn2), visit(node.receiver), node.write_name, node.binary_operator, visit_write_value(node.value))
end
end
@@ -325,13 +353,13 @@ module Prism
# case foo; when bar; end
# ^^^^^^^^^^^^^^^^^^^^^^^
def visit_case_node(node)
- s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.consequent)
+ s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.else_clause)
end
# case foo; in bar; end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_case_match_node(node)
- s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.consequent)
+ s(node, :case, visit(node.predicate)).concat(visit_all(node.conditions)) << visit(node.else_clause)
end
# class Foo; end
@@ -344,14 +372,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :class, name, visit(node.superclass))
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :class, name, visit(node.superclass))
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# @@foo
@@ -362,9 +394,6 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
s(node, class_variable_write_type, node.name, visit_write_value(node.value))
end
@@ -372,7 +401,7 @@ module Prism
# @@foo += bar
# ^^^^^^^^^^^^
def visit_class_variable_operator_write_node(node)
- s(node, class_variable_write_type, node.name, s(node, :call, s(node, :cvar, node.name), node.operator, visit_write_value(node.value)))
+ s(node, class_variable_write_type, node.name, s(node, :call, s(node, :cvar, node.name), node.binary_operator, visit_write_value(node.value)))
end
# @@foo &&= bar
@@ -417,7 +446,7 @@ module Prism
# Foo += bar
# ^^^^^^^^^^^
def visit_constant_operator_write_node(node)
- s(node, :cdecl, node.name, s(node, :call, s(node, :const, node.name), node.operator, visit_write_value(node.value)))
+ s(node, :cdecl, node.name, s(node, :call, s(node, :const, node.name), node.binary_operator, visit_write_value(node.value)))
end
# Foo &&= bar
@@ -442,9 +471,9 @@ module Prism
# ^^^^^^^^
def visit_constant_path_node(node)
if node.parent.nil?
- s(node, :colon3, node.child.name)
+ s(node, :colon3, node.name)
else
- s(node, :colon2, visit(node.parent), node.child.name)
+ s(node, :colon2, visit(node.parent), node.name)
end
end
@@ -460,7 +489,7 @@ module Prism
# Foo::Bar += baz
# ^^^^^^^^^^^^^^^
def visit_constant_path_operator_write_node(node)
- s(node, :op_asgn, visit(node.target), node.operator, visit_write_value(node.value))
+ s(node, :op_asgn, visit(node.target), node.binary_operator, visit_write_value(node.value))
end
# Foo::Bar &&= baz
@@ -480,9 +509,9 @@ module Prism
def visit_constant_path_target_node(node)
inner =
if node.parent.nil?
- s(node, :colon3, node.child.name)
+ s(node, :colon3, node.name)
else
- s(node, :colon2, visit(node.parent), node.child.name)
+ s(node, :colon2, visit(node.parent), node.name)
end
s(node, :const, inner)
@@ -502,7 +531,9 @@ module Prism
s(node, :defs, visit(node.receiver), name)
end
+ attach_comments(result, node)
result.line(node.name_loc.start_line)
+
if node.parameters.nil?
result << s(node, :args).line(node.name_loc.start_line)
else
@@ -617,9 +648,6 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
s(node, :gasgn, node.name, visit_write_value(node.value))
end
@@ -627,7 +655,7 @@ module Prism
# $foo += bar
# ^^^^^^^^^^^
def visit_global_variable_operator_write_node(node)
- s(node, :gasgn, node.name, s(node, :call, s(node, :gvar, node.name), node.operator, visit(node.value)))
+ s(node, :gasgn, node.name, s(node, :call, s(node, :gvar, node.name), node.binary_operator, visit(node.value)))
end
# $foo &&= bar
@@ -678,7 +706,7 @@ module Prism
# foo ? bar : baz
# ^^^^^^^^^^^^^^^
def visit_if_node(node)
- s(node, :if, visit(node.predicate), visit(node.statements), visit(node.consequent))
+ s(node, :if, visit(node.predicate), visit(node.statements), visit(node.subsequent))
end
# 1i
@@ -719,7 +747,7 @@ module Prism
arglist << visit(node.block) if !node.block.nil?
end
- s(node, :op_asgn1, visit(node.receiver), arglist, node.operator, visit_write_value(node.value))
+ s(node, :op_asgn1, visit(node.receiver), arglist, node.binary_operator, visit_write_value(node.value))
end
# foo[bar] &&= baz
@@ -765,9 +793,6 @@ module Prism
# @foo = 1
# ^^^^^^^^
- #
- # @foo, @bar = 1
- # ^^^^ ^^^^
def visit_instance_variable_write_node(node)
s(node, :iasgn, node.name, visit_write_value(node.value))
end
@@ -775,7 +800,7 @@ module Prism
# @foo += bar
# ^^^^^^^^^^^
def visit_instance_variable_operator_write_node(node)
- s(node, :iasgn, node.name, s(node, :call, s(node, :ivar, node.name), node.operator, visit_write_value(node.value)))
+ s(node, :iasgn, node.name, s(node, :call, s(node, :ivar, node.name), node.binary_operator, visit_write_value(node.value)))
end
# @foo &&= bar
@@ -805,17 +830,29 @@ module Prism
# if /foo #{bar}/ then end
# ^^^^^^^^^^^^
def visit_interpolated_match_last_line_node(node)
- s(node, :match, s(node, :dregx).concat(visit_interpolated_parts(node.parts)))
+ parts = visit_interpolated_parts(node.parts)
+ regexp =
+ if parts.length == 1
+ s(node, :lit, Regexp.new(parts.first, node.options))
+ else
+ s(node, :dregx).concat(parts).tap do |result|
+ options = node.options
+ result << options if options != 0
+ end
+ end
+
+ s(node, :match, regexp)
end
# /foo #{bar}/
# ^^^^^^^^^^^^
def visit_interpolated_regular_expression_node(node)
- if node.parts.all? { |part| part.is_a?(StringNode) || (part.is_a?(EmbeddedStatementsNode) && part.statements&.body&.length == 1 && part.statements.body.first.is_a?(StringNode)) }
- unescaped = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : part.statements.body.first.unescaped }.join
- s(node, :lit, Regexp.new(unescaped, node.options))
+ parts = visit_interpolated_parts(node.parts)
+
+ if parts.length == 1
+ s(node, :lit, Regexp.new(parts.first, node.options))
else
- s(node, :dregx).concat(visit_interpolated_parts(node.parts)).tap do |result|
+ s(node, :dregx).concat(parts).tap do |result|
options = node.options
result << options if options != 0
end
@@ -825,47 +862,102 @@ module Prism
# "foo #{bar}"
# ^^^^^^^^^^^^
def visit_interpolated_string_node(node)
- if (node.parts.all? { |part| part.is_a?(StringNode) || (part.is_a?(EmbeddedStatementsNode) && part.statements&.body&.length == 1 && part.statements.body.first.is_a?(StringNode)) }) ||
- (node.opening.nil? && node.parts.all? { |part| part.is_a?(StringNode) && !part.opening_loc.nil? })
- unescaped = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : part.statements.body.first.unescaped }.join
- s(node, :str, unescaped)
- else
- s(node, :dstr).concat(visit_interpolated_parts(node.parts))
- end
+ parts = visit_interpolated_parts(node.parts)
+ parts.length == 1 ? s(node, :str, parts.first) : s(node, :dstr).concat(parts)
end
# :"foo #{bar}"
# ^^^^^^^^^^^^^
def visit_interpolated_symbol_node(node)
- if node.parts.all? { |part| part.is_a?(StringNode) || (part.is_a?(EmbeddedStatementsNode) && part.statements&.body&.length == 1 && part.statements.body.first.is_a?(StringNode)) }
- unescaped = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : part.statements.body.first.unescaped }.join
- s(node, :lit, unescaped.to_sym)
- else
- s(node, :dsym).concat(visit_interpolated_parts(node.parts))
- end
+ parts = visit_interpolated_parts(node.parts)
+ parts.length == 1 ? s(node, :lit, parts.first.to_sym) : s(node, :dsym).concat(parts)
end
# `foo #{bar}`
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
- children = visit_interpolated_parts(node.parts)
- s(node.heredoc? ? node.parts.first : node, :dxstr).concat(children)
+ source = node.heredoc? ? node.parts.first : node
+ parts = visit_interpolated_parts(node.parts)
+ parts.length == 1 ? s(source, :xstr, parts.first) : s(source, :dxstr).concat(parts)
end
# Visit the interpolated content of the string-like node.
private def visit_interpolated_parts(parts)
- parts.each_with_object([]).with_index do |(part, results), index|
- if index == 0
- if part.is_a?(StringNode)
- results << part.unescaped
+ visited = []
+
+ parts.each do |part|
+ result = visit(part)
+
+ if result[0] == :evstr && result[1]
+ if result[1][0] == :str
+ visited << result[1]
+ elsif result[1][0] == :dstr
+ visited.concat(result[1][1..-1])
else
- results << ""
- results << visit(part)
+ visited << result
+ end
+ visited << :space
+ elsif result[0] == :dstr
+ if !visited.empty? && part.parts[0].is_a?(StringNode)
+ # If we are in the middle of an implicitly concatenated string,
+ # we should not have a bare string as the first part. In this
+ # case we need to visit just that first part and then we can
+ # push the rest of the parts onto the visited array.
+ result[1] = visit(part.parts[0])
end
+ visited.concat(result[1..-1])
else
- results << visit(part)
+ visited << result
end
end
+
+ state = :beginning #: :beginning | :string_content | :interpolated_content
+ results = []
+
+ visited.each_with_index do |result, index|
+ case state
+ when :beginning
+ if result.is_a?(String)
+ results << result
+ state = :string_content
+ elsif result.is_a?(Array) && result[0] == :str
+ results << result[1]
+ state = :string_content
+ else
+ results << ""
+ results << result
+ state = :interpolated_content
+ end
+ when :string_content
+ if result == :space
+ # continue
+ elsif result.is_a?(String)
+ results[0] = "#{results[0]}#{result}"
+ elsif result.is_a?(Array) && result[0] == :str
+ results[0] = "#{results[0]}#{result[1]}"
+ else
+ results << result
+ state = :interpolated_content
+ end
+ when :interpolated_content
+ if result == :space
+ # continue
+ elsif visited[index - 1] != :space && result.is_a?(Array) && result[0] == :str && results[-1][0] == :str && (results[-1].line_max == result.line)
+ results[-1][1] = "#{results[-1][1]}#{result[1]}"
+ results[-1].line_max = result.line_max
+ else
+ results << result
+ end
+ end
+ end
+
+ results
+ end
+
+ # -> { it }
+ # ^^
+ def visit_it_local_variable_read_node(node)
+ s(node, :call, nil, :it)
end
# foo(bar: baz)
@@ -887,8 +979,8 @@ module Prism
def visit_lambda_node(node)
parameters =
case node.parameters
- when nil, NumberedParametersNode
- s(node, :args)
+ when nil, ItParametersNode, NumberedParametersNode
+ 0
else
visit(node.parameters)
end
@@ -912,9 +1004,6 @@ module Prism
# foo = 1
# ^^^^^^^
- #
- # foo, bar = 1
- # ^^^ ^^^
def visit_local_variable_write_node(node)
s(node, :lasgn, node.name, visit_write_value(node.value))
end
@@ -922,7 +1011,7 @@ module Prism
# foo += bar
# ^^^^^^^^^^
def visit_local_variable_operator_write_node(node)
- s(node, :lasgn, node.name, s(node, :call, s(node, :lvar, node.name), node.operator, visit_write_value(node.value)))
+ s(node, :lasgn, node.name, s(node, :call, s(node, :lvar, node.name), node.binary_operator, visit_write_value(node.value)))
end
# foo &&= bar
@@ -970,8 +1059,8 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
- raise "Cannot visit missing node directly"
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery node directly"
end
# module Foo; end
@@ -984,14 +1073,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :module, name)
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :module, name)
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# foo, bar = baz
@@ -1047,6 +1140,12 @@ module Prism
s(node, :nil)
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ :"&nil"
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1080,14 +1179,26 @@ module Prism
# a or b
# ^^^^^^
def visit_or_node(node)
- s(node, :or, visit(node.left), visit(node.right))
+ left = visit(node.left)
+
+ if left[0] == :or
+ # ruby_parser has the or keyword as right-associative as opposed to
+ # prism which has it as left-associative. We reverse that
+ # associativity here.
+ nest = left
+ nest = nest[2] while nest[2][0] == :or
+ nest[2] = s(node, :or, nest[2], visit(node.right))
+ left
+ else
+ s(node, :or, left, visit(node.right))
+ end
end
# def foo(bar, *baz); end
# ^^^^^^^^^
def visit_parameters_node(node)
children =
- node.compact_child_nodes.map do |element|
+ node.each_child_node.map do |element|
if element.is_a?(MultiTargetNode)
visit_destructured_parameter(element)
else
@@ -1297,7 +1408,7 @@ module Prism
# __FILE__
# ^^^^^^^^
def visit_source_file_node(node)
- s(node, :str, file)
+ s(node, :str, node.filepath)
end
# __LINE__
@@ -1336,7 +1447,14 @@ module Prism
# "foo"
# ^^^^^
def visit_string_node(node)
- s(node, :str, node.unescaped)
+ unescaped = node.unescaped
+
+ if node.forced_binary_encoding?
+ unescaped = unescaped.dup
+ unescaped.force_encoding(Encoding::BINARY)
+ end
+
+ s(node, :str, unescaped)
end
# super(foo)
@@ -1378,7 +1496,7 @@ module Prism
# bar unless foo
# ^^^^^^^^^^^^^^
def visit_unless_node(node)
- s(node, :if, visit(node.predicate), visit(node.consequent), visit(node.statements))
+ s(node, :if, visit(node.predicate), visit(node.else_clause), visit(node.statements))
end
# until foo; bar end
@@ -1429,6 +1547,17 @@ module Prism
private
+ # Attach prism comments to the given sexp.
+ def attach_comments(sexp, node)
+ return unless node.comments
+ return if node.comments.empty?
+
+ extra = node.location.start_line - node.comments.last.location.start_line
+ comments = node.comments.map(&:slice)
+ comments.concat([nil] * [0, extra].max)
+ sexp.comments = comments.join("\n")
+ end
+
# Create a new compiler with the given options.
def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern)
Compiler.new(file, in_def: in_def, in_pattern: in_pattern)
@@ -1451,7 +1580,7 @@ module Prism
else
parameters =
case block.parameters
- when nil, NumberedParametersNode
+ when nil, ItParametersNode, NumberedParametersNode
0
else
visit(block.parameters)
@@ -1498,13 +1627,21 @@ module Prism
# Parse the given source and translate it into the seattlerb/ruby_parser
# gem's Sexp format.
def parse(source, filepath = "(string)")
- translate(Prism.parse(source), filepath)
+ translate(Prism.parse(source, filepath: filepath, partial_script: true), filepath)
end
# Parse the given file and translate it into the seattlerb/ruby_parser
# gem's Sexp format.
def parse_file(filepath)
- translate(Prism.parse_file(filepath), filepath)
+ translate(Prism.parse_file(filepath, partial_script: true), filepath)
+ end
+
+ # Parse the give file and translate it into the
+ # seattlerb/ruby_parser gem's Sexp format. This method is
+ # provided for API compatibility to RubyParser and takes an
+ # optional +timeout+ argument.
+ def process(ruby, file = "(string)", timeout = nil)
+ Timeout.timeout(timeout) { parse(ruby, file) }
end
class << self
@@ -1531,6 +1668,7 @@ module Prism
raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}"
end
+ result.attach_comments!
result.value.accept(Compiler.new(filepath))
end
end