summaryrefslogtreecommitdiff
path: root/lib/prism
diff options
context:
space:
mode:
Diffstat (limited to 'lib/prism')
-rw-r--r--lib/prism/desugar_compiler.rb140
-rw-r--r--lib/prism/ffi.rb181
-rw-r--r--lib/prism/lex_compat.rb469
-rw-r--r--lib/prism/node_ext.rb348
-rw-r--r--lib/prism/node_find.rb185
-rw-r--r--lib/prism/pack.rb228
-rw-r--r--lib/prism/parse_result.rb515
-rw-r--r--lib/prism/parse_result/comments.rb44
-rw-r--r--lib/prism/parse_result/errors.rb9
-rw-r--r--lib/prism/parse_result/newlines.rb60
-rw-r--r--lib/prism/pattern.rb84
-rw-r--r--lib/prism/polyfill/scan_byte.rb14
-rw-r--r--lib/prism/polyfill/warn.rb38
-rw-r--r--lib/prism/prism.gemspec189
-rw-r--r--lib/prism/relocation.rb187
-rw-r--r--lib/prism/string_query.rb18
-rw-r--r--lib/prism/translation.rb11
-rw-r--r--lib/prism/translation/parser.rb18
-rw-r--r--lib/prism/translation/parser/builder.rb13
-rw-r--r--lib/prism/translation/parser/compiler.rb70
-rw-r--r--lib/prism/translation/parser/lexer.rb59
-rw-r--r--lib/prism/translation/parser33.rb12
-rw-r--r--lib/prism/translation/parser34.rb12
-rw-r--r--lib/prism/translation/parser35.rb12
-rw-r--r--lib/prism/translation/parser_current.rb10
-rw-r--r--lib/prism/translation/parser_versions.rb36
-rw-r--r--lib/prism/translation/ripper.rb1239
-rw-r--r--lib/prism/translation/ripper/filter.rb53
-rw-r--r--lib/prism/translation/ripper/lexer.rb133
-rw-r--r--lib/prism/translation/ripper/sexp.rb13
-rw-r--r--lib/prism/translation/ripper/shim.rb2
-rw-r--r--lib/prism/translation/ruby_parser.rb106
32 files changed, 3073 insertions, 1435 deletions
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb
index e3b15fc3b0..c64d03f64a 100644
--- a/lib/prism/desugar_compiler.rb
+++ b/lib/prism/desugar_compiler.rb
@@ -1,11 +1,18 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class DesugarAndWriteNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -15,6 +22,8 @@ module Prism
end
# Desugar `x &&= y` to `x && x = y`
+ #--
+ #: () -> node
def compile
and_node(
location: node.location,
@@ -35,8 +44,12 @@ module Prism
class DesugarOrWriteDefinedNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -46,6 +59,8 @@ module Prism
end
# Desugar `x ||= y` to `defined?(x) ? x : x = y`
+ #--
+ #: () -> node
def compile
if_node(
location: node.location,
@@ -86,8 +101,12 @@ module Prism
class DesugarOperatorWriteNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -97,6 +116,8 @@ module Prism
end
# Desugar `x += y` to `x = x + y`
+ #--
+ #: () -> node
def compile
binary_operator_loc = node.binary_operator_loc.chop
@@ -130,8 +151,12 @@ module Prism
class DesugarOrWriteNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: InstanceVariableOrWriteNode | LocalVariableOrWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((InstanceVariableOrWriteNode | LocalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -141,6 +166,8 @@ module Prism
end
# Desugar `x ||= y` to `x || x = y`
+ #--
+ #: () -> node
def compile
or_node(
location: node.location,
@@ -161,90 +188,105 @@ module Prism
private_constant :DesugarAndWriteNode, :DesugarOrWriteNode, :DesugarOrWriteDefinedNode, :DesugarOperatorWriteNode
class ClassVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ClassVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteDefinedNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ClassVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ConstantAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class ConstantOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteDefinedNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class ConstantOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class GlobalVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class GlobalVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteDefinedNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class GlobalVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class InstanceVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class InstanceVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class InstanceVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class LocalVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
class LocalVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
class LocalVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
@@ -253,137 +295,167 @@ module Prism
# DesugarCompiler is a compiler that desugars Ruby code into a more primitive
# form. This is useful for consumers that want to deal with fewer node types.
class DesugarCompiler < MutationCompiler
- # @@foo &&= bar
+ # `@@foo &&= bar`
#
# becomes
#
- # @@foo && @@foo = bar
+ # `@@foo && @@foo = bar`
+ #--
+ #: (ClassVariableAndWriteNode node) -> node
def visit_class_variable_and_write_node(node)
node.desugar
end
- # @@foo ||= bar
+ # `@@foo ||= bar`
#
# becomes
#
- # defined?(@@foo) ? @@foo : @@foo = bar
+ # `defined?(@@foo) ? @@foo : @@foo = bar`
+ #--
+ #: (ClassVariableOrWriteNode node) -> node
def visit_class_variable_or_write_node(node)
node.desugar
end
- # @@foo += bar
+ # `@@foo += bar`
#
# becomes
#
- # @@foo = @@foo + bar
+ # `@@foo = @@foo + bar`
+ #--
+ #: (ClassVariableOperatorWriteNode node) -> node
def visit_class_variable_operator_write_node(node)
node.desugar
end
- # Foo &&= bar
+ # `Foo &&= bar`
#
# becomes
#
- # Foo && Foo = bar
+ # `Foo && Foo = bar`
+ #--
+ #: (ConstantAndWriteNode node) -> node
def visit_constant_and_write_node(node)
node.desugar
end
- # Foo ||= bar
+ # `Foo ||= bar`
#
# becomes
#
- # defined?(Foo) ? Foo : Foo = bar
+ # `defined?(Foo) ? Foo : Foo = bar`
+ #--
+ #: (ConstantOrWriteNode node) -> node
def visit_constant_or_write_node(node)
node.desugar
end
- # Foo += bar
+ # `Foo += bar`
#
# becomes
#
- # Foo = Foo + bar
+ # `Foo = Foo + bar`
+ #--
+ #: (ConstantOperatorWriteNode node) -> node
def visit_constant_operator_write_node(node)
node.desugar
end
- # $foo &&= bar
+ # `$foo &&= bar`
#
# becomes
#
- # $foo && $foo = bar
+ # `$foo && $foo = bar`
+ #--
+ #: (GlobalVariableAndWriteNode node) -> node
def visit_global_variable_and_write_node(node)
node.desugar
end
- # $foo ||= bar
+ # `$foo ||= bar`
#
# becomes
#
- # defined?($foo) ? $foo : $foo = bar
+ # `defined?($foo) ? $foo : $foo = bar`
+ #--
+ #: (GlobalVariableOrWriteNode node) -> node
def visit_global_variable_or_write_node(node)
node.desugar
end
- # $foo += bar
+ # `$foo += bar`
#
# becomes
#
- # $foo = $foo + bar
+ # `$foo = $foo + bar`
+ #--
+ #: (GlobalVariableOperatorWriteNode node) -> node
def visit_global_variable_operator_write_node(node)
node.desugar
end
- # @foo &&= bar
+ # `@foo &&= bar`
#
# becomes
#
- # @foo && @foo = bar
+ # `@foo && @foo = bar`
+ #--
+ #: (InstanceVariableAndWriteNode node) -> node
def visit_instance_variable_and_write_node(node)
node.desugar
end
- # @foo ||= bar
+ # `@foo ||= bar`
#
# becomes
#
- # @foo || @foo = bar
+ # `@foo || @foo = bar`
+ #--
+ #: (InstanceVariableOrWriteNode node) -> node
def visit_instance_variable_or_write_node(node)
node.desugar
end
- # @foo += bar
+ # `@foo += bar`
#
# becomes
#
- # @foo = @foo + bar
+ # `@foo = @foo + bar`
+ #--
+ #: (InstanceVariableOperatorWriteNode node) -> node
def visit_instance_variable_operator_write_node(node)
node.desugar
end
- # foo &&= bar
+ # `foo &&= bar`
#
# becomes
#
- # foo && foo = bar
+ # `foo && foo = bar`
+ #--
+ #: (LocalVariableAndWriteNode node) -> node
def visit_local_variable_and_write_node(node)
node.desugar
end
- # foo ||= bar
+ # `foo ||= bar`
#
# becomes
#
- # foo || foo = bar
+ # `foo || foo = bar`
+ #--
+ #: (LocalVariableOrWriteNode node) -> node
def visit_local_variable_or_write_node(node)
node.desugar
end
- # foo += bar
+ # `foo += bar`
#
# becomes
#
- # foo = foo + bar
+ # `foo = foo + bar`
+ #--
+ #: (LocalVariableOperatorWriteNode node) -> node
def visit_local_variable_operator_write_node(node)
node.desugar
end
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index a0da0b6195..6b9bde51ea 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
# typed: ignore
# This file is responsible for mirroring the API provided by the C extension by
@@ -11,7 +12,7 @@ require "ffi"
# autoloaded from within a non-main Ractor.
require "prism/serialize" if defined?(Ractor)
-module Prism
+module Prism # :nodoc:
module LibRubyParser # :nodoc:
extend FFI::Library
@@ -58,6 +59,9 @@ module Prism
# We only want to load the functions that we are interested in.
next unless functions.any? { |function| line.include?(function) }
+ # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.)
+ line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");")
+
# Parse the function declaration.
unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
raise "Could not parse #{line}"
@@ -84,29 +88,44 @@ module Prism
raise "Could not find functions #{functions.inspect}" unless functions.empty?
end
- callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
- enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
+ callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer
+ callback :pm_source_stream_feof_t, [:pointer], :int
+ pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR]
+ enum :pm_source_init_result_t, pm_source_init_result_values
enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
+ # Ractor-safe lookup table for pm_source_init_result_t, since FFI's
+ # enum_type accesses module instance variables that are not shareable.
+ SOURCE_INIT_RESULT = pm_source_init_result_values.freeze
+
load_exported_functions_from(
- "prism.h",
+ "prism/version.h",
"pm_version",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/serialize.h",
"pm_serialize_parse",
"pm_serialize_parse_stream",
"pm_serialize_parse_comments",
"pm_serialize_lex",
"pm_serialize_parse_lex",
- "pm_parse_success_p",
+ "pm_serialize_parse_success_p",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/string_query.h",
"pm_string_query_local",
"pm_string_query_constant",
"pm_string_query_method_name",
- [:pm_parse_stream_fgets_t]
+ []
)
load_exported_functions_from(
- "prism/util/pm_buffer.h",
- "pm_buffer_sizeof",
- "pm_buffer_init",
+ "prism/buffer.h",
+ "pm_buffer_new",
"pm_buffer_value",
"pm_buffer_length",
"pm_buffer_free",
@@ -114,20 +133,19 @@ module Prism
)
load_exported_functions_from(
- "prism/util/pm_string.h",
- "pm_string_mapped_init",
- "pm_string_free",
- "pm_string_source",
- "pm_string_length",
- "pm_string_sizeof",
- []
+ "prism/source.h",
+ "pm_source_file_new",
+ "pm_source_mapped_new",
+ "pm_source_stream_new",
+ "pm_source_free",
+ "pm_source_source",
+ "pm_source_length",
+ [:pm_source_stream_fgets_t, :pm_source_stream_feof_t]
)
# This object represents a pm_buffer_t. We only use it as an opaque pointer,
# so it doesn't need to know the fields of pm_buffer_t.
class PrismBuffer # :nodoc:
- SIZEOF = LibRubyParser.pm_buffer_sizeof
-
attr_reader :pointer
def initialize(pointer)
@@ -149,20 +167,20 @@ module Prism
# Initialize a new buffer and yield it to the block. The buffer will be
# automatically freed when the block returns.
def self.with
- FFI::MemoryPointer.new(SIZEOF) do |pointer|
- raise unless LibRubyParser.pm_buffer_init(pointer)
- return yield new(pointer)
+ buffer = LibRubyParser.pm_buffer_new
+ raise unless buffer
+
+ begin
+ yield new(buffer)
ensure
- LibRubyParser.pm_buffer_free(pointer)
+ LibRubyParser.pm_buffer_free(buffer)
end
end
end
- # This object represents a pm_string_t. We only use it as an opaque pointer,
- # so it doesn't have to be an FFI::Struct.
- class PrismString # :nodoc:
- SIZEOF = LibRubyParser.pm_string_sizeof
-
+ # This object represents source code to be parsed. For strings it wraps a
+ # pointer directly; for files it uses a pm_source_t under the hood.
+ class PrismSource # :nodoc:
PLATFORM_EXPECTS_UTF8 =
RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i)
@@ -179,7 +197,7 @@ module Prism
@pointer.read_string(@length)
end
- # Yields a pm_string_t pointer to the given block.
+ # Yields a PrismSource backed by the given string to the block.
def self.with_string(string)
raise TypeError unless string.is_a?(String)
@@ -193,32 +211,38 @@ module Prism
end
end
- # Yields a pm_string_t pointer to the given block.
+ # Yields a PrismSource to the given block, backed by a pm_source_t.
def self.with_file(filepath)
raise TypeError unless filepath.is_a?(String)
# On Windows and Mac, it's expected that filepaths will be encoded in
# UTF-8. If they are not, we need to convert them to UTF-8 before
- # passing them into pm_string_mapped_init.
+ # passing them into pm_source_mapped_new.
if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
filepath = filepath.encode(Encoding::UTF_8)
end
- FFI::MemoryPointer.new(SIZEOF) do |pm_string|
- case (result = LibRubyParser.pm_string_mapped_init(pm_string, filepath))
- when :PM_STRING_INIT_SUCCESS
- pointer = LibRubyParser.pm_string_source(pm_string)
- length = LibRubyParser.pm_string_length(pm_string)
+ FFI::MemoryPointer.new(:int) do |result_ptr|
+ pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr)
+
+ case SOURCE_INIT_RESULT[result_ptr.read_int]
+ when :PM_SOURCE_INIT_SUCCESS
+ pointer = LibRubyParser.pm_source_source(pm_source)
+ length = LibRubyParser.pm_source_length(pm_source)
return yield new(pointer, length, false)
- when :PM_STRING_INIT_ERROR_GENERIC
+ when :PM_SOURCE_INIT_ERROR_GENERIC
raise SystemCallError.new(filepath, FFI.errno)
- when :PM_STRING_INIT_ERROR_DIRECTORY
+ when :PM_SOURCE_INIT_ERROR_DIRECTORY
raise Errno::EISDIR.new(filepath)
+ when :PM_SOURCE_INIT_ERROR_NON_REGULAR
+ # Fall back to reading the file through Ruby IO for non-regular
+ # files (pipes, character devices, etc.)
+ return with_string(File.read(filepath)) { |string| yield string }
else
- raise "Unknown error initializing pm_string_t: #{result.inspect}"
+ raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}"
end
ensure
- LibRubyParser.pm_string_free(pm_string)
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
end
end
end
@@ -234,29 +258,29 @@ module Prism
class << self
# Mirror the Prism.dump API by using the serialization API.
def dump(source, **options)
- LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) }
+ LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) }
end
# Mirror the Prism.dump_file API by using the serialization API.
def dump_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) }
end
# Mirror the Prism.lex API by using the serialization API.
def lex(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) }
end
# Mirror the Prism.lex_file API by using the serialization API.
def lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) }
end
# Mirror the Prism.parse API by using the serialization API.
def parse(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) }
end
# Mirror the Prism.parse_file API by using the serialization API. This uses
@@ -264,7 +288,7 @@ module Prism
# when it is available.
def parse_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) }
end
# Mirror the Prism.parse_stream API by using the serialization API.
@@ -280,19 +304,21 @@ module Prism
end
}
- # In the pm_serialize_parse_stream function it accepts a pointer to the
- # IO object as a void* and then passes it through to the callback as the
- # third argument, but it never touches it itself. As such, since we have
- # access to the IO object already through the closure of the lambda, we
- # can pass a null pointer here and not worry.
- LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
- Prism.load(source, buffer.read, options.fetch(:freeze, false))
+ eof_callback = -> (_) { stream.eof? }
+
+ pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback)
+ begin
+ LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options))
+ Prism.load(source, buffer.read, options.fetch(:freeze, false))
+ ensure
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
+ end
end
end
# Mirror the Prism.parse_comments API by using the serialization API.
def parse_comments(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) }
end
# Mirror the Prism.parse_file_comments API by using the serialization
@@ -300,23 +326,23 @@ module Prism
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
end
# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) }
end
# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
end
# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) }
end
# Mirror the Prism.parse_failure? API by using the serialization API.
@@ -327,7 +353,7 @@ module Prism
# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) }
end
# Mirror the Prism.parse_file_failure? API by using the serialization API.
@@ -337,7 +363,7 @@ module Prism
# Mirror the Prism.profile API by using the serialization API.
def profile(source, **options)
- LibRubyParser::PrismString.with_string(source) do |string|
+ LibRubyParser::PrismSource.with_string(source) do |string|
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
nil
@@ -347,7 +373,7 @@ module Prism
# Mirror the Prism.profile_file API by using the serialization API.
def profile_file(filepath, **options)
- LibRubyParser::PrismString.with_file(filepath) do |string|
+ LibRubyParser::PrismSource.with_file(filepath) do |string|
LibRubyParser::PrismBuffer.with do |buffer|
options[:filepath] = filepath
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
@@ -396,7 +422,7 @@ module Prism
end
def parse_file_success_common(string, options) # :nodoc:
- LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options))
+ LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options))
end
# Return the value that should be dumped for the command_line option.
@@ -420,16 +446,35 @@ module Prism
# Return the value that should be dumped for the version option.
def dump_options_version(version)
case version
- when nil, "latest"
- 0
+ when "current"
+ version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION)
+ when "latest", nil
+ 0 # Handled in pm_parser_init
+ when "nearest"
+ dump = version_string_to_number(RUBY_VERSION)
+ return dump if dump
+ if RUBY_VERSION < "3.3"
+ version_string_to_number("3.3")
+ else
+ 0 # Handled in pm_parser_init
+ end
+ else
+ version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}")
+ end
+ end
+
+ # Converts a version string like "4.0.0" or "4.0" into a number.
+ # Returns nil if the version is unknown.
+ def version_string_to_number(version)
+ case version
when /\A3\.3(\.\d+)?\z/
1
when /\A3\.4(\.\d+)?\z/
2
- when /\A3\.5(\.\d+)?\z/
- 0
- else
- raise ArgumentError, "invalid version: #{version}"
+ when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/
+ 3
+ when /\A4\.1(\.\d+)?\z/
+ 4
end
end
@@ -531,7 +576,7 @@ module Prism
# Here we are going to patch StringQuery to put in the class-level methods so
# that it can maintain a consistent interface
- class StringQuery
+ class StringQuery # :nodoc:
class << self
# Mirrors the C extension's StringQuery::local? method.
def local?(string)
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index a83c24cb41..7aacec037d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -1,28 +1,64 @@
# frozen_string_literal: true
-
-require "delegate"
-require "ripper"
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
+ # @rbs!
+ # module Translation
+ # class Ripper
+ # EXPR_NONE: Integer
+ # EXPR_BEG: Integer
+ # EXPR_MID: Integer
+ # EXPR_END: Integer
+ # EXPR_CLASS: Integer
+ # EXPR_VALUE: Integer
+ # EXPR_ARG: Integer
+ # EXPR_CMDARG: Integer
+ # EXPR_ENDARG: Integer
+ # EXPR_ENDFN: Integer
+ #
+ # class Lexer < Ripper
+ # class State
+ # def self.[]: (Integer value) -> State
+ # end
+ # end
+ #
+ # class LineAndColumnCache
+ # def initialize: (Source source) -> void
+ #
+ # def line_and_column: (Integer byte_offset) -> [Integer, Integer]
+ # end
+ # end
+ # end
+
# This class is responsible for lexing the source using prism and then
# converting those tokens to be compatible with Ripper. In the vast majority
# of cases, this is a one-to-one mapping of the token type. Everything else
# generally lines up. However, there are a few cases that require special
# handling.
class LexCompat # :nodoc:
+ # @rbs!
+ # # A token produced by the Ripper lexer that Prism is replicating.
+ # type lex_compat_token = [[Integer, Integer], Symbol, String, untyped]
+
# A result class specialized for holding tokens produced by the lexer.
class Result < Prism::Result
# The list of tokens that were produced by the lexer.
- attr_reader :value
+ attr_reader :value #: Array[lex_compat_token]
# Create a new lex compat result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for Result.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
end
@@ -104,6 +140,7 @@ module Prism
KEYWORD_DEF: :on_kw,
KEYWORD_DEFINED: :on_kw,
KEYWORD_DO: :on_kw,
+ KEYWORD_DO_BLOCK: :on_kw,
KEYWORD_DO_LOOP: :on_kw,
KEYWORD_ELSE: :on_kw,
KEYWORD_ELSIF: :on_kw,
@@ -198,93 +235,6 @@ module Prism
"__END__": :on___end__
}.freeze
- # When we produce tokens, we produce the same arrays that Ripper does.
- # However, we add a couple of convenience methods onto them to make them a
- # little easier to work with. We delegate all other methods to the array.
- class Token < SimpleDelegator
- # @dynamic initialize, each, []
-
- # The location of the token in the source.
- def location
- self[0]
- end
-
- # The type of the token.
- def event
- self[1]
- end
-
- # The slice of the source that this token represents.
- def value
- self[2]
- end
-
- # The state of the lexer when this token was produced.
- def state
- self[3]
- end
- end
-
- # Ripper doesn't include the rest of the token in the event, so we need to
- # trim it down to just the content on the first line when comparing.
- class EndContentToken < Token
- def ==(other) # :nodoc:
- [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
- end
- end
-
- # Tokens where state should be ignored
- # used for :on_comment, :on_heredoc_end, :on_embexpr_end
- class IgnoreStateToken < Token
- def ==(other) # :nodoc:
- self[0...-1] == other[0...-1]
- end
- end
-
- # Ident tokens for the most part are exactly the same, except sometimes we
- # know an ident is a local when ripper doesn't (when they are introduced
- # through named captures in regular expressions). In that case we don't
- # compare the state.
- class IdentToken < Token
- def ==(other) # :nodoc:
- (self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
- (other[3] & Ripper::EXPR_ARG_ANY != 0)
- )
- end
- end
-
- # Ignored newlines can occasionally have a LABEL state attached to them, so
- # we compare the state differently here.
- class IgnoredNewlineToken < Token
- def ==(other) # :nodoc:
- return false unless self[0...-1] == other[0...-1]
-
- if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
- other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
- else
- self[3] == other[3]
- end
- end
- end
-
- # If we have an identifier that follows a method name like:
- #
- # def foo bar
- #
- # then Ripper will mark bar as END|LABEL if there is a local in a parent
- # scope named bar because it hasn't pushed the local table yet. We do this
- # more accurately, so we need to allow comparing against both END and
- # END|LABEL.
- class ParamToken < Token
- def ==(other) # :nodoc:
- (self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_END) ||
- (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
- )
- end
- end
-
# A heredoc in this case is a list of tokens that belong to the body of the
# heredoc that should be appended onto the list of tokens when the heredoc
# closes.
@@ -294,16 +244,19 @@ module Prism
# order back into the token stream and set the state of the last token to
# the state that the heredoc was opened in.
class PlainHeredoc # :nodoc:
- attr_reader :tokens
+ attr_reader :tokens #: Array[lex_compat_token]
+ #: () -> void
def initialize
@tokens = []
end
+ #: (lex_compat_token token) -> void
def <<(token)
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
tokens
end
@@ -313,22 +266,26 @@ module Prism
# that need to be split on "\\\n" to mimic Ripper's behavior. We also need
# to keep track of the state that the heredoc was opened in.
class DashHeredoc # :nodoc:
- attr_reader :split, :tokens
+ attr_reader :split #: bool
+ attr_reader :tokens #: Array[lex_compat_token]
+ #: (bool split) -> void
def initialize(split)
@split = split
@tokens = []
end
+ #: (lex_compat_token token) -> void
def <<(token)
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
embexpr_balance = 0
- tokens.each_with_object([]) do |token, results| #$ Array[Token]
- case token.event
+ tokens.each_with_object([]) do |token, results| #$ Array[lex_compat_token]
+ case token[1]
when :on_embexpr_beg
embexpr_balance += 1
results << token
@@ -343,9 +300,9 @@ module Prism
if split
# Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
# to keep the delimiter in the result.
- token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+ token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
column = 0 if index > 0
- results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ results << [[lineno, column], :on_tstring_content, value, token[3]]
lineno += value.count("\n")
end
else
@@ -374,8 +331,13 @@ module Prism
class DedentingHeredoc # :nodoc:
TAB_WIDTH = 8
- attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
+ attr_reader :tokens #: Array[lex_compat_token]
+ attr_reader :dedent_next #: bool
+ attr_reader :dedent #: Integer?
+ attr_reader :embexpr_balance #: Integer
+ # @rbs @ended_on_newline: bool
+ #: () -> void
def initialize
@tokens = []
@dedent_next = true
@@ -387,8 +349,10 @@ module Prism
# As tokens are coming in, we track the minimum amount of common leading
# whitespace on plain string content tokens. This allows us to later
# remove that amount of whitespace from the beginning of each line.
+ #
+ #: (lex_compat_token token) -> void
def <<(token)
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
@embexpr_balance += 1
@dedent = 0 if @dedent_next && @ended_on_newline
@@ -396,10 +360,10 @@ module Prism
@embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
- line = token.value
+ line = token[2]
if dedent_next && !(line.strip.empty? && line.end_with?("\n"))
- leading = line[/\A(\s*)\n?/, 1]
+ leading = line[/\A(\s*)\n?/, 1] #: String
next_dedent = 0
leading.each_char do |char|
@@ -419,20 +383,21 @@ module Prism
end
end
- @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+ @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0
@ended_on_newline = false
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
# If every line in the heredoc is blank, we still need to split up the
# string content token into multiple tokens.
if dedent.nil?
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
embexpr_balance = 0
tokens.each do |token|
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
results << token
@@ -444,9 +409,9 @@ module Prism
lineno = token[0][0]
column = token[0][1]
- token.value.split(/(?<=\n)/).each_with_index do |value, index|
+ token[2].split(/(?<=\n)/).each_with_index do |value, index|
column = 0 if index > 0
- results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ results << [[lineno, column], :on_tstring_content, value, token[3]]
lineno += 1
end
else
@@ -463,7 +428,7 @@ module Prism
# If the minimum common whitespace is 0, then we need to concatenate
# string nodes together that are immediately adjacent.
if dedent == 0
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
embexpr_balance = 0
index = 0
@@ -474,15 +439,15 @@ module Prism
results << token
index += 1
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
when :on_embexpr_end, :on_heredoc_end
embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
- while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
- token.value << tokens[index].value
+ while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/)
+ token[2] << tokens[index][2]
index += 1
end
end
@@ -496,7 +461,7 @@ module Prism
# insert on_ignored_sp tokens for the amount of dedent that we need to
# perform. We also need to remove the dedent from the beginning of
# each line of plain string content tokens.
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
dedent_next = true
embexpr_balance = 0
@@ -505,7 +470,7 @@ module Prism
# whitespace calculation we performed above. This is because
# checking if the subsequent token needs to be dedented is common to
# both the dedent calculation and the ignored_sp insertion.
- case token.event
+ case token[1]
when :on_embexpr_beg
embexpr_balance += 1
results << token
@@ -517,7 +482,7 @@ module Prism
# Here we're going to split the string on newlines, but maintain
# the newlines in the resulting array. We'll do that with a look
# behind assertion.
- splits = token.value.split(/(?<=\n)/)
+ splits = token[2].split(/(?<=\n)/)
index = 0
while index < splits.length
@@ -535,7 +500,8 @@ module Prism
# line or this line doesn't start with whitespace, then we
# should concatenate the rest of the string to match ripper.
if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
- line = splits[index..].join
+ unjoined = splits[index..] #: Array[String]
+ line = unjoined.join
index = splits.length
end
@@ -574,12 +540,12 @@ module Prism
ignored = deleted_chars.join
line.delete_prefix!(ignored)
- results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+ results << [[lineno, 0], :on_ignored_sp, ignored, token[3]]
column = ignored.length
end
end
- results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+ results << [[lineno, column], token[1], line, token[3]] unless line.empty?
index += 1
end
else
@@ -590,7 +556,7 @@ module Prism
end
dedent_next =
- ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+ ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) &&
embexpr_balance == 0
end
@@ -600,12 +566,14 @@ module Prism
# Here we will split between the two types of heredocs and return the
# object that will store their tokens.
+ #--
+ #: (lex_compat_token opening) -> (PlainHeredoc | DashHeredoc | DedentingHeredoc)
def self.build(opening)
- case opening.value[2]
+ case opening[2][2]
when "~"
DedentingHeredoc.new
when "-"
- DashHeredoc.new(opening.value[3] != "'")
+ DashHeredoc.new(opening[2][3] != "'")
else
PlainHeredoc.new
end
@@ -614,33 +582,43 @@ module Prism
private_constant :Heredoc
- attr_reader :source, :options
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+ # first token, so we had to have a hack in place to account for that.
+ BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
+ private_constant :BOM_FLUSHED
+ attr_reader :options #: Hash[Symbol, untyped]
+ # @rbs @source: String
+
+ #: (String source, **untyped options) -> void
def initialize(source, **options)
@source = source
@options = options
end
+ #: () -> Result
def result
- tokens = [] #: Array[LexCompat::Token]
+ tokens = [] #: Array[lex_compat_token]
state = :default
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
- result = Prism.lex(source, **options)
+ result = Prism.lex(@source, **options)
+ source = result.source
result_value = result.value
- previous_state = nil #: Ripper::Lexer::State?
+ previous_state = nil #: Translation::Ripper::Lexer::State?
last_heredoc_end = nil #: Integer?
+ eof_token = nil #: Token?
+
+ bom = source.slice(0, 3) == "\xEF\xBB\xBF"
- # In previous versions of Ruby, Ripper wouldn't flush the bom before the
- # first token, so we had to have a hack in place to account for that. This
- # checks for that behavior.
- bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
- bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
+ result_value.each_with_index do |(prism_token, prism_state), index|
+ lineno = prism_token.location.start_line
+ column = prism_token.location.start_column
- result_value.each_with_index do |(token, lex_state), index|
- lineno = token.location.start_line
- column = token.location.start_column
+ event = RIPPER.fetch(prism_token.type)
+ value = prism_token.value
+ lex_state = Translation::Ripper::Lexer::State[prism_state]
# If there's a UTF-8 byte-order mark as the start of the file, then for
# certain tokens ripper sets the first token back by 3 bytes. It also
@@ -650,70 +628,53 @@ module Prism
if bom && lineno == 1
column -= 3
- if index == 0 && column == 0 && !bom_flushed
+ if index == 0 && column == 0 && !BOM_FLUSHED
flushed =
- case token.type
+ case prism_token.type
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
:GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
:PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
:PERCENT_UPPER_W, :STRING_BEGIN
true
when :REGEXP_BEGIN, :SYMBOL_BEGIN
- token.value.start_with?("%")
+ value.start_with?("%")
else
false
end
unless flushed
column -= 3
- value = token.value
value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
end
end
end
- event = RIPPER.fetch(token.type)
- value = token.value
- lex_state = Ripper::Lexer::State.new(lex_state)
-
- token =
+ lex_compat_token =
case event
when :on___end__
- EndContentToken.new([[lineno, column], event, value, lex_state])
+ # Ripper doesn't include the rest of the token in the event, so we need to
+ # trim it down to just the content on the first line.
+ value = value[0..value.index("\n")] #: String
+ [[lineno, column], event, value, lex_state]
when :on_comment
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
when :on_heredoc_end
# Heredoc end tokens can be emitted in an odd order, so we don't
# want to bother comparing the state on them.
- last_heredoc_end = token.location.end_offset
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
- when :on_ident
- if lex_state == Ripper::EXPR_END
- # If we have an identifier that follows a method name like:
- #
- # def foo bar
- #
- # then Ripper will mark bar as END|LABEL if there is a local in a
- # parent scope named bar because it hasn't pushed the local table
- # yet. We do this more accurately, so we need to allow comparing
- # against both END and END|LABEL.
- ParamToken.new([[lineno, column], event, value, lex_state])
- elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
- # In the event that we're comparing identifiers, we're going to
- # allow a little divergence. Ripper doesn't account for local
- # variables introduced through named captures in regexes, and we
- # do, which accounts for this difference.
- IdentToken.new([[lineno, column], event, value, lex_state])
- else
- Token.new([[lineno, column], event, value, lex_state])
- end
+ last_heredoc_end = prism_token.location.end_offset
+ [[lineno, column], event, value, lex_state]
when :on_embexpr_end
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
- when :on_ignored_nl
- # Ignored newlines can occasionally have a LABEL state attached to
- # them which doesn't actually impact anything. We don't mirror that
- # state so we ignored it.
- IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
+ when :on_words_sep
+ # Ripper emits one token each per line.
+ value.each_line.with_index do |line, index|
+ if index > 0
+ lineno += 1
+ column = 0
+ end
+ tokens << [[lineno, column], event, line, lex_state]
+ end
+ tokens.pop #: lex_compat_token
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. prism sets lex state
@@ -738,13 +699,14 @@ module Prism
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end
- Ripper::Lexer::State.new(result_value[current_index][1])
+ Translation::Ripper::Lexer::State[result_value[current_index][1]]
else
previous_state
end
- Token.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
when :on_eof
+ eof_token = prism_token
previous_token = result_value[index - 1][0]
# If we're at the end of the file and the previous token was a
@@ -759,7 +721,7 @@ module Prism
# Use the greater offset of the two to determine the start of
# the trailing whitespace.
start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max
- end_offset = token.location.start_offset
+ end_offset = prism_token.location.start_offset
if start_offset < end_offset
if bom
@@ -767,14 +729,14 @@ module Prism
end_offset += 3
end
- tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+ tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]
end
end
- Token.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
else
- Token.new([[lineno, column], event, value, lex_state])
- end
+ [[lineno, column], event, value, lex_state]
+ end #: lex_compat_token
previous_state = lex_state
@@ -791,19 +753,19 @@ module Prism
when :default
# The default state is when there are no heredocs at all. In this
# state we can append the token to the list of tokens and move on.
- tokens << token
+ tokens << lex_compat_token
# If we get the declaration of a heredoc, then we open a new heredoc
# and move into the heredoc_opened state.
if event == :on_heredoc_beg
state = :heredoc_opened
- heredoc_stack.last << Heredoc.build(token)
+ heredoc_stack.last << Heredoc.build(lex_compat_token)
end
when :heredoc_opened
# The heredoc_opened state is when we've seen the declaration of a
# heredoc and are now lexing the body of the heredoc. In this state we
# push tokens onto the most recently created heredoc.
- heredoc_stack.last.last << token
+ heredoc_stack.last.last << lex_compat_token
case event
when :on_heredoc_beg
@@ -811,7 +773,7 @@ module Prism
# heredoc, this means we have nested heredocs. In this case we'll
# push a new heredoc onto the stack and stay in the heredoc_opened
# state since we're now lexing the body of the new heredoc.
- heredoc_stack << [Heredoc.build(token)]
+ heredoc_stack << [Heredoc.build(lex_compat_token)]
when :on_heredoc_end
# If we receive the end of a heredoc, then we're done lexing the
# body of the heredoc. In this case we now have a completed heredoc
@@ -820,10 +782,10 @@ module Prism
state = :heredoc_closed
end
when :heredoc_closed
- if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
+ if %i[on_nl on_ignored_nl on_comment].include?(event) || ((event == :on_tstring_content) && value.end_with?("\n"))
if heredoc_stack.size > 1
- flushing = heredoc_stack.pop
- heredoc_stack.last.last << token
+ flushing = heredoc_stack.pop #: Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]
+ heredoc_stack.last.last << lex_compat_token
flushing.each do |heredoc|
heredoc.to_a.each do |flushed_token|
@@ -835,12 +797,12 @@ module Prism
next
end
elsif event == :on_heredoc_beg
- tokens << token
+ tokens << lex_compat_token
state = :heredoc_opened
- heredoc_stack.last << Heredoc.build(token)
+ heredoc_stack.last << Heredoc.build(lex_compat_token)
next
elsif heredoc_stack.size > 1
- heredoc_stack[-2].last << token
+ heredoc_stack[-2].last << lex_compat_token
next
end
@@ -851,77 +813,94 @@ module Prism
heredoc_stack.last.clear
state = :default
- tokens << token
+ tokens << lex_compat_token
end
end
- # Drop the EOF token from the list
- tokens = tokens[0...-1]
-
- # We sort by location to compare against Ripper's output
- tokens.sort_by!(&:location)
-
- Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
- end
- end
+ # Drop the EOF token from the list. The EOF token may not be
+ # present if the source was syntax invalid
+ if tokens.dig(-1, 1) == :on_eof
+ tokens = tokens[0...-1] #: Array[lex_compat_token]
+ end
- private_constant :LexCompat
+ # We sort by location because Ripper.lex sorts.
+ tokens.sort_by! do |token|
+ line, column = token[0]
+ source.byte_offset(line, column)
+ end
- # This is a class that wraps the Ripper lexer to produce almost exactly the
- # same tokens.
- class LexRipper # :nodoc:
- attr_reader :source
+ tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token)
- def initialize(source)
- @source = source
+ Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, result.continuable?, source)
end
- def result
- previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
- results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]
-
- lex(source).each do |token|
- case token[1]
- when :on_sp
- # skip
- when :on_tstring_content
- if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
- previous[2] << token[2]
- else
- results << token
- previous = token
- end
- when :on_words_sep
- if previous[1] == :on_words_sep
- previous[2] << token[2]
+ private
+
+ #: (Array[lex_compat_token] tokens, Source source, Location? data_loc, bool bom, Token? eof_token) -> Array[lex_compat_token]
+ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
+ new_tokens = [] #: Array[lex_compat_token]
+
+ prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
+ prev_token_end = bom ? 3 : 0
+
+ cache = Translation::Ripper::LineAndColumnCache.new(source)
+
+ tokens.each do |token|
+ # Skip missing heredoc ends.
+ next if token[1] == :on_heredoc_end && token[2] == ""
+
+ # Add :on_sp tokens.
+ line, column = token[0]
+ start_offset = source.byte_offset(line, column)
+
+ # Ripper reports columns on line 1 without counting the BOM, so we
+ # adjust to get the real offset
+ start_offset += 3 if line == 1 && bom
+
+ if start_offset > prev_token_end
+ sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
+ sp_line, sp_column = cache.line_and_column(prev_token_end)
+ # Ripper reports columns on line 1 without counting the BOM
+ sp_column -= 3 if sp_line == 1 && bom
+ continuation_index = sp_value.byteindex("\\")
+
+ # ripper emits up to three :on_sp tokens when line continuations are used
+ if continuation_index
+ next_whitespace_index = continuation_index + 1
+ next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
+ next_whitespace_index += 1
+ first_whitespace = sp_value[0...continuation_index] #: String
+ continuation = sp_value[continuation_index...next_whitespace_index] #: String
+ second_whitespace = sp_value[next_whitespace_index..] || ""
+
+ new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
+ new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
+ new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty?
else
- results << token
- previous = token
+ new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state]
end
- else
- results << token
- previous = token
end
- end
-
- results
- end
-
- private
- if Ripper.method(:lex).parameters.assoc(:keyrest)
- def lex(source)
- Ripper.lex(source, raise_errors: true)
+ new_tokens << token
+ prev_token_state = token[3]
+ prev_token_end = start_offset + token[2].bytesize
end
- else
- def lex(source)
- ripper = Ripper::Lexer.new(source)
- ripper.lex.tap do |result|
- raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
+
+ if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+ end_offset = eof_token.location.end_offset
+ if prev_token_end < end_offset
+ new_tokens << [
+ [source.line(prev_token_end), source.column(prev_token_end)],
+ :on_sp,
+ source.slice(prev_token_end, end_offset - prev_token_end),
+ prev_token_state
+ ]
end
end
+
+ new_tokens
end
end
- private_constant :LexRipper
+ private_constant :LexCompat
end
diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb
index b007a051ea..8a6624e76d 100644
--- a/lib/prism/node_ext.rb
+++ b/lib/prism/node_ext.rb
@@ -1,12 +1,17 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+#--
# Here we are reopening the prism module to provide methods on nodes that aren't
# templated and are meant as convenience methods.
+#++
module Prism
class Node
+ #: (*String replacements) -> void
def deprecated(*replacements) # :nodoc:
- location = caller_locations(1, 1)
- location = location[0].label if location
+ location = caller_locations(1, 1)&.[](0)&.label
suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
warn(<<~MSG, uplevel: 1, category: :deprecated)
@@ -20,7 +25,9 @@ module Prism
module RegularExpressionOptions # :nodoc:
# Returns a numeric value that represents the flags that were used to create
# the regular expression.
- def options
+ #--
+ #: (Integer flags) -> Integer
+ def self.options(flags)
o = 0
o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
@@ -32,43 +39,87 @@ module Prism
end
class InterpolatedMatchLastLineNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class InterpolatedRegularExpressionNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class MatchLastLineNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class RegularExpressionNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
private_constant :RegularExpressionOptions
module HeredocQuery # :nodoc:
# Returns true if this node was represented as a heredoc in the source code.
- def heredoc?
+ #--
+ #: (String? opening) -> bool?
+ def self.heredoc?(opening)
+ # @type self: InterpolatedStringNode | InterpolatedXStringNode | StringNode | XStringNode
opening&.start_with?("<<")
end
end
class InterpolatedStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
end
class InterpolatedXStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
end
class StringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
# Occasionally it's helpful to treat a string as if it were interpolated so
# that there's a consistent interface for working with strings.
+ #--
+ #: () -> InterpolatedStringNode
def to_interpolated
InterpolatedStringNode.new(
source,
@@ -83,10 +134,17 @@ module Prism
end
class XStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
# Occasionally it's helpful to treat a string as if it were interpolated so
# that there's a consistent interface for working with strings.
+ #--
+ #: () -> InterpolatedXStringNode
def to_interpolated
InterpolatedXStringNode.new(
source,
@@ -104,6 +162,8 @@ module Prism
class ImaginaryNode < Node
# Returns the value of the node as a Ruby Complex.
+ #--
+ #: () -> Complex
def value
Complex(0, numeric.value)
end
@@ -111,31 +171,25 @@ module Prism
class RationalNode < Node
# Returns the value of the node as a Ruby Rational.
+ #--
+ #: () -> Rational
def value
Rational(numerator, denominator)
end
-
- # Returns the value of the node as an IntegerNode or a FloatNode. This
- # method is deprecated in favor of #value or #numerator/#denominator.
- def numeric
- deprecated("value", "numerator", "denominator")
-
- if denominator == 1
- IntegerNode.new(source, -1, location.chop, flags, numerator)
- else
- FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
- end
- end
end
class ConstantReadNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -144,11 +198,15 @@ module Prism
class ConstantWriteNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -163,13 +221,15 @@ module Prism
# local variable
class DynamicPartsInConstantPathError < StandardError; end
- # An error class raised when missing nodes are found while computing a
+ # An error class raised when error recovery nodes are found while computing a
# constant path's full name. For example:
# Foo:: -> raises because the constant path is missing the last part
- class MissingNodesInConstantPathError < StandardError; end
+ class ErrorRecoveryNodesInConstantPathError < StandardError; end
# Returns the list of parts for the full name of this constant path.
# For example: [:Foo, :Bar]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
parts = [] #: Array[Symbol]
current = self #: node?
@@ -177,7 +237,7 @@ module Prism
while current.is_a?(ConstantPathNode)
name = current.name
if name.nil?
- raise MissingNodesInConstantPathError, "Constant path contains missing nodes. Cannot compute full name"
+ raise ErrorRecoveryNodesInConstantPathError, "Constant path contains error recovery nodes. Cannot compute full name"
end
parts.unshift(name)
@@ -192,30 +252,21 @@ module Prism
end
# Returns the full name of this constant path. For example: "Foo::Bar"
+ #--
+ #: () -> String
def full_name
full_name_parts.join("::")
end
-
- # Previously, we had a child node on this class that contained either a
- # constant read or a missing node. To not cause a breaking change, we
- # continue to supply that API.
- def child
- deprecated("name", "name_loc")
-
- if name
- ConstantReadNode.new(source, -1, name_loc, 0, name)
- else
- MissingNode.new(source, -1, location, 0)
- end
- end
end
class ConstantPathTargetNode < Node
# Returns the list of parts for the full name of this constant path.
# For example: [:Foo, :Bar]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
parts =
- case parent
+ case (parent = self.parent)
when ConstantPathNode, ConstantReadNode
parent.full_name_parts
when nil
@@ -225,40 +276,33 @@ module Prism
raise ConstantPathNode::DynamicPartsInConstantPathError, "Constant target path contains dynamic parts. Cannot compute full name"
end
- if name.nil?
- raise ConstantPathNode::MissingNodesInConstantPathError, "Constant target path contains missing nodes. Cannot compute full name"
+ if (name = self.name).nil?
+ raise ConstantPathNode::ErrorRecoveryNodesInConstantPathError, "Constant target path contains error recovery nodes. Cannot compute full name"
end
parts.push(name)
end
# Returns the full name of this constant path. For example: "Foo::Bar"
+ #--
+ #: () -> String
def full_name
full_name_parts.join("::")
end
-
- # Previously, we had a child node on this class that contained either a
- # constant read or a missing node. To not cause a breaking change, we
- # continue to supply that API.
- def child
- deprecated("name", "name_loc")
-
- if name
- ConstantReadNode.new(source, -1, name_loc, 0, name)
- else
- MissingNode.new(source, -1, location, 0)
- end
- end
end
class ConstantTargetNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -266,6 +310,8 @@ module Prism
class ParametersNode < Node
# Mirrors the Method#parameters method.
+ #--
+ #: () -> Array[[Symbol, Symbol] | [Symbol]]
def signature
names = [] #: Array[[Symbol, Symbol] | [Symbol]]
@@ -275,7 +321,7 @@ module Prism
optionals.each { |param| names << [:opt, param.name] }
- if rest && rest.is_a?(RestParameterNode)
+ if (rest = self.rest).is_a?(RestParameterNode)
names << [:rest, rest.name || :*]
end
@@ -283,8 +329,7 @@ module Prism
case param
when MultiTargetNode
names << [:req]
- when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
- # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
+ when ErrorRecoveryNode
raise "Invalid syntax"
else
names << [:req, param.name]
@@ -304,7 +349,7 @@ module Prism
keyopt.each { |param| names << [:key, param.name] }
- case keyword_rest
+ case (keyword_rest = self.keyword_rest)
when ForwardingParameterNode
names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]])
when KeywordRestParameterNode
@@ -313,7 +358,13 @@ module Prism
names << [:nokey]
end
- names << [:block, block.name || :&] if block
+ case (block = self.block)
+ when BlockParameterNode
+ names << [:block, block.name || :&]
+ when NoBlockParameterNode
+ names << [:noblock]
+ end
+
names
end
end
@@ -328,181 +379,10 @@ module Prism
# can be any amount of space between the message and the = sign. However,
# sometimes you want the location of the full message including the inner
# space and the = sign. This method provides that.
+ #--
+ #: () -> Location?
def full_message_loc
attribute_write? ? message_loc&.adjoin("=") : message_loc
end
end
-
- class CallOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class ClassVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class ConstantOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class ConstantPathOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class GlobalVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class IndexOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class InstanceVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class LocalVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class CaseMatchNode < Node
- # Returns the else clause of the case match node. This method is deprecated
- # in favor of #else_clause.
- def consequent
- deprecated("else_clause")
- else_clause
- end
- end
-
- class CaseNode < Node
- # Returns the else clause of the case node. This method is deprecated in
- # favor of #else_clause.
- def consequent
- deprecated("else_clause")
- else_clause
- end
- end
-
- class IfNode < Node
- # Returns the subsequent if/elsif/else clause of the if node. This method is
- # deprecated in favor of #subsequent.
- def consequent
- deprecated("subsequent")
- subsequent
- end
- end
-
- class RescueNode < Node
- # Returns the subsequent rescue clause of the rescue node. This method is
- # deprecated in favor of #subsequent.
- def consequent
- deprecated("subsequent")
- subsequent
- end
- end
-
- class UnlessNode < Node
- # Returns the else clause of the unless node. This method is deprecated in
- # favor of #else_clause.
- def consequent
- deprecated("else_clause")
- else_clause
- end
- end
end
diff --git a/lib/prism/node_find.rb b/lib/prism/node_find.rb
new file mode 100644
index 0000000000..697ee430e8
--- /dev/null
+++ b/lib/prism/node_find.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+
+module Prism
+ # Finds the Prism AST node corresponding to a given Method, UnboundMethod,
+ # Proc, or Thread::Backtrace::Location. On CRuby, uses node_id from the
+ # instruction sequence for an exact match. On other implementations, falls
+ # back to best-effort matching by source location line number.
+ #
+ # This module is autoloaded so that programs that don't use Prism.find don't
+ # pay for its definition.
+ module NodeFind # :nodoc:
+ # Find the node for the given callable or backtrace location.
+ #--
+ #: (Method | UnboundMethod | Proc | Thread::Backtrace::Location callable, bool rubyvm) -> Node?
+ def self.find(callable, rubyvm)
+ case callable
+ when Proc
+ if rubyvm
+ RubyVMCallableFind.new.find(callable)
+ elsif callable.lambda?
+ LineLambdaFind.new.find(callable)
+ else
+ LineProcFind.new.find(callable)
+ end
+ when Method, UnboundMethod
+ if rubyvm
+ RubyVMCallableFind.new.find(callable)
+ else
+ LineMethodFind.new.find(callable)
+ end
+ when Thread::Backtrace::Location
+ if rubyvm
+ RubyVMBacktraceLocationFind.new.find(callable)
+ else
+ LineBacktraceLocationFind.new.find(callable)
+ end
+ else
+ raise ArgumentError, "Expected a Method, UnboundMethod, Proc, or Thread::Backtrace::Location, got #{callable.class}"
+ end
+ end
+
+ # Base class that handles parsing a file.
+ class Find
+ private
+
+ # Parse the given file path, returning a ParseResult or nil.
+ #--
+ #: (String? file) -> ParseResult?
+ def parse_file(file)
+ return unless file && File.readable?(file)
+ result = Prism.parse_file(file)
+ result if result.success?
+ end
+ end
+
+ # Finds the AST node for a Method, UnboundMethod, or Proc using the node_id
+ # from the instruction sequence.
+ class RubyVMCallableFind < Find
+ # Find the node for the given callable using the ISeq node_id.
+ #--
+ #: (Method | UnboundMethod | Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+ return unless (iseq = RubyVM::InstructionSequence.of(callable))
+
+ header = iseq.to_a[4]
+ return unless header[:parser] == :prism
+
+ result.value.find { |node| node.node_id == header[:node_id] }
+ end
+ end
+
+ # Finds the AST node for a Thread::Backtrace::Location using the node_id
+ # from the backtrace location.
+ class RubyVMBacktraceLocationFind < Find
+ # Find the node for the given backtrace location using node_id.
+ #--
+ #: (Thread::Backtrace::Location location) -> Node?
+ def find(location)
+ file = location.absolute_path || location.path
+ return unless (result = parse_file(file))
+ return unless RubyVM::AbstractSyntaxTree.respond_to?(:node_id_for_backtrace_location)
+
+ node_id = RubyVM::AbstractSyntaxTree.node_id_for_backtrace_location(location)
+
+ result.value.find { |node| node.node_id == node_id }
+ end
+ end
+
+ # Finds the AST node for a Method or UnboundMethod using best-effort line
+ # matching. Used on non-CRuby implementations.
+ class LineMethodFind < Find
+ # Find the node for the given method by matching on name and line.
+ #--
+ #: (Method | UnboundMethod callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ name = callable.name
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when DefNode
+ node.name == name && node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a lambda using best-effort line matching. Used
+ # on non-CRuby implementations.
+ class LineLambdaFind < Find
+ # Find the node for the given lambda by matching on line.
+ #--
+ #: (Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when LambdaNode
+ node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a non-lambda Proc using best-effort line
+ # matching. Used on non-CRuby implementations.
+ class LineProcFind < Find
+ # Find the node for the given proc by matching on line.
+ #--
+ #: (Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when ForNode
+ node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a Thread::Backtrace::Location using best-effort
+ # line matching. Used on non-CRuby implementations.
+ class LineBacktraceLocationFind < Find
+ # Find the node for the given backtrace location by matching on line.
+ #--
+ #: (Thread::Backtrace::Location location) -> Node?
+ def find(location)
+ file = location.absolute_path || location.path
+ return unless (result = parse_file(file))
+
+ start_line = location.lineno
+ result.value.find { |node| node.location.start_line == start_line }
+ end
+ end
+ end
+end
diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb
deleted file mode 100644
index c0de8ab8b7..0000000000
--- a/lib/prism/pack.rb
+++ /dev/null
@@ -1,228 +0,0 @@
-# frozen_string_literal: true
-# typed: ignore
-
-module Prism
- # A parser for the pack template language.
- module Pack
- %i[
- SPACE
- COMMENT
- INTEGER
- UTF8
- BER
- FLOAT
- STRING_SPACE_PADDED
- STRING_NULL_PADDED
- STRING_NULL_TERMINATED
- STRING_MSB
- STRING_LSB
- STRING_HEX_HIGH
- STRING_HEX_LOW
- STRING_UU
- STRING_MIME
- STRING_BASE64
- STRING_FIXED
- STRING_POINTER
- MOVE
- BACK
- NULL
-
- UNSIGNED
- SIGNED
- SIGNED_NA
-
- AGNOSTIC_ENDIAN
- LITTLE_ENDIAN
- BIG_ENDIAN
- NATIVE_ENDIAN
- ENDIAN_NA
-
- SIZE_SHORT
- SIZE_INT
- SIZE_LONG
- SIZE_LONG_LONG
- SIZE_8
- SIZE_16
- SIZE_32
- SIZE_64
- SIZE_P
- SIZE_NA
-
- LENGTH_FIXED
- LENGTH_MAX
- LENGTH_RELATIVE
- LENGTH_NA
- ].each do |const|
- const_set(const, const)
- end
-
- # A directive in the pack template language.
- class Directive
- # A symbol representing the version of Ruby.
- attr_reader :version
-
- # A symbol representing whether or not we are packing or unpacking.
- attr_reader :variant
-
- # A byteslice of the source string that this directive represents.
- attr_reader :source
-
- # The type of the directive.
- attr_reader :type
-
- # The type of signedness of the directive.
- attr_reader :signed
-
- # The type of endianness of the directive.
- attr_reader :endian
-
- # The size of the directive.
- attr_reader :size
-
- # The length type of this directive (used for integers).
- attr_reader :length_type
-
- # The length of this directive (used for integers).
- attr_reader :length
-
- # Initialize a new directive with the given values.
- def initialize(version, variant, source, type, signed, endian, size, length_type, length)
- @version = version
- @variant = variant
- @source = source
- @type = type
- @signed = signed
- @endian = endian
- @size = size
- @length_type = length_type
- @length = length
- end
-
- # The descriptions of the various types of endianness.
- ENDIAN_DESCRIPTIONS = {
- AGNOSTIC_ENDIAN: "agnostic",
- LITTLE_ENDIAN: "little-endian (VAX)",
- BIG_ENDIAN: "big-endian (network)",
- NATIVE_ENDIAN: "native-endian",
- ENDIAN_NA: "n/a"
- }
-
- # The descriptions of the various types of signedness.
- SIGNED_DESCRIPTIONS = {
- UNSIGNED: "unsigned",
- SIGNED: "signed",
- SIGNED_NA: "n/a"
- }
-
- # The descriptions of the various types of sizes.
- SIZE_DESCRIPTIONS = {
- SIZE_SHORT: "short",
- SIZE_INT: "int-width",
- SIZE_LONG: "long",
- SIZE_LONG_LONG: "long long",
- SIZE_8: "8-bit",
- SIZE_16: "16-bit",
- SIZE_32: "32-bit",
- SIZE_64: "64-bit",
- SIZE_P: "pointer-width"
- }
-
- # Provide a human-readable description of the directive.
- def describe
- case type
- when SPACE
- "whitespace"
- when COMMENT
- "comment"
- when INTEGER
- if size == SIZE_8
- base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
- else
- base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
- end
- case length_type
- when LENGTH_FIXED
- if length > 1
- base + ", x#{length}"
- else
- base
- end
- when LENGTH_MAX
- base + ", as many as possible"
- else
- raise
- end
- when UTF8
- "UTF-8 character"
- when BER
- "BER-compressed integer"
- when FLOAT
- "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
- when STRING_SPACE_PADDED
- "arbitrary binary string (space padded)"
- when STRING_NULL_PADDED
- "arbitrary binary string (null padded, count is width)"
- when STRING_NULL_TERMINATED
- "arbitrary binary string (null padded, count is width), except that null is added with *"
- when STRING_MSB
- "bit string (MSB first)"
- when STRING_LSB
- "bit string (LSB first)"
- when STRING_HEX_HIGH
- "hex string (high nibble first)"
- when STRING_HEX_LOW
- "hex string (low nibble first)"
- when STRING_UU
- "UU-encoded string"
- when STRING_MIME
- "quoted printable, MIME encoding"
- when STRING_BASE64
- "base64 encoded string"
- when STRING_FIXED
- "pointer to a structure (fixed-length string)"
- when STRING_POINTER
- "pointer to a null-terminated string"
- when MOVE
- "move to absolute position"
- when BACK
- "back up a byte"
- when NULL
- "null byte"
- else
- raise
- end
- end
- end
-
- # The result of parsing a pack template.
- class Format
- # A list of the directives in the template.
- attr_reader :directives
-
- # The encoding of the template.
- attr_reader :encoding
-
- # Create a new Format with the given directives and encoding.
- def initialize(directives, encoding)
- @directives = directives
- @encoding = encoding
- end
-
- # Provide a human-readable description of the format.
- def describe
- source_width = directives.map { |d| d.source.inspect.length }.max
- directive_lines = directives.map do |directive|
- if directive.type == SPACE
- source = directive.source.inspect
- else
- source = directive.source
- end
- # @type var source_width: Integer
- " #{source.ljust(source_width)} #{directive.describe}"
- end
-
- (["Directives:"] + directive_lines + ["Encoding:", " #{encoding}"]).join("\n")
- end
- end
- end
-end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index 9a3e7c5b79..93d3c006b7 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -1,6 +1,16 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
+ # @rbs!
+ # # An internal interface for a cache that can be used to compute code
+ # # units from byte offsets.
+ # interface _CodeUnitsCache
+ # def []: (Integer byte_offset) -> Integer
+ # end
+
# This represents a source of Ruby code that has been parsed. It is used in
# conjunction with locations to allow them to resolve line numbers and source
# ranges.
@@ -9,7 +19,18 @@ module Prism
# be used instead of `new` and it will return either a `Source` or a
# specialized and more performant `ASCIISource` if no multibyte characters
# are present in the source code.
- def self.for(source, start_line = 1, offsets = [])
+ #
+ # Note that if you are calling this method manually, you will need to supply
+ # the start_line and offsets parameters. start_line is the line number that
+ # the source starts on, which is typically 1 but can be different if this
+ # source is a subset of a larger source or if this is an eval. offsets is an
+ # array of byte offsets for the start of each line in the source code, which
+ # can be calculated by iterating through the source code and recording the
+ # byte offset whenever a newline character is encountered. The first
+ # element is always 0 to mark the first line.
+ #--
+ #: (String source, Integer start_line, Array[Integer] offsets) -> Source
+ def self.for(source, start_line, offsets)
if source.ascii_only?
ASCIISource.new(source, start_line, offsets)
elsif source.encoding == Encoding::BINARY
@@ -33,77 +54,122 @@ module Prism
end
# The source code that this source object represents.
- attr_reader :source
+ attr_reader :source #: String
# The line number where this source starts.
- attr_reader :start_line
-
- # The list of newline byte offsets in the source code.
- attr_reader :offsets
-
- # Create a new source object with the given source code.
- def initialize(source, start_line = 1, offsets = [])
+ attr_reader :start_line #: Integer
+
+ # The list of newline byte offsets in the source code. When initialized from
+ # the C extension, this may be a packed binary string of uint32_t values
+ # that is lazily unpacked on first access.
+ #--
+ #: () -> Array[Integer]
+ def offsets
+ offsets = @offsets
+ return offsets if offsets.is_a?(Array)
+ @offsets = offsets.unpack("L*")
+ end
+
+ # Create a new source object with the given source code. The offsets
+ # parameter can be either an Array of Integer byte offsets or a packed
+ # binary string of uint32_t values (from the C extension).
+ #--
+ #: (String source, Integer start_line, Array[Integer] | String offsets) -> void
+ def initialize(source, start_line, offsets)
@source = source
- @start_line = start_line # set after parsing is done
- @offsets = offsets # set after parsing is done
+ @start_line = start_line
+ @offsets = offsets
end
# Replace the value of start_line with the given value.
+ #--
+ #: (Integer start_line) -> void
def replace_start_line(start_line)
@start_line = start_line
end
# Replace the value of offsets with the given value.
+ #--
+ #: (Array[Integer] offsets) -> void
def replace_offsets(offsets)
- @offsets.replace(offsets)
+ @offsets = offsets
end
# Returns the encoding of the source code, which is set by parameters to the
# parser or by the encoding magic comment.
+ #--
+ #: () -> Encoding
def encoding
source.encoding
end
# Returns the lines of the source code as an array of strings.
+ #--
+ #: () -> Array[String]
def lines
source.lines
end
# Perform a byteslice on the source code using the given byte offset and
# byte length.
+ #--
+ #: (Integer byte_offset, Integer length) -> String
def slice(byte_offset, length)
source.byteslice(byte_offset, length) or raise
end
+ # Converts the line number and column in bytes to a byte offset.
+ #--
+ #: (Integer line, Integer column) -> Integer
+ def byte_offset(line, column)
+ normal = line - @start_line
+ raise IndexError if normal < 0
+ offsets.fetch(normal) + column
+ rescue IndexError
+ raise ArgumentError, "line #{line} is out of range"
+ end
+
# Binary search through the offsets to find the line number for the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line(byte_offset)
start_line + find_line(byte_offset)
end
# Return the byte offset of the start of the line corresponding to the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line_start(byte_offset)
offsets[find_line(byte_offset)]
end
# Returns the byte offset of the end of the line corresponding to the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line_end(byte_offset)
offsets[find_line(byte_offset) + 1] || source.bytesize
end
- # Return the column number for the given byte offset.
+ # Return the column in bytes for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def column(byte_offset)
byte_offset - line_start(byte_offset)
end
# Return the character offset for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_offset(byte_offset)
(source.byteslice(0, byte_offset) or raise).length
end
- # Return the column number in characters for the given byte offset.
+ # Return the column in characters for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_column(byte_offset)
character_offset(byte_offset) - character_offset(line_start(byte_offset))
end
@@ -120,7 +186,11 @@ module Prism
# possible that the given byte offset will not occur on a character
# boundary. Second, it's possible that the source code will contain a
# character that has no equivalent in the given encoding.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_offset(byte_offset, encoding)
+ return byte_offset if encoding == Encoding::UTF_8
+
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
@@ -132,43 +202,36 @@ module Prism
# Generate a cache that targets a specific encoding for calculating code
# unit offsets.
+ #--
+ #: (Encoding encoding) -> CodeUnitsCache
def code_units_cache(encoding)
CodeUnitsCache.new(source, encoding)
end
- # Returns the column number in code units for the given encoding for the
+ # Returns the column in code units for the given encoding for the
# given byte offset.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_column(byte_offset, encoding)
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
end
# Freeze this object and the objects it contains.
+ #--
+ #: () -> void
def deep_freeze
source.freeze
offsets.freeze
freeze
end
- private
-
- # Binary search through the offsets to find the line number for the given
+ # Binary search through the offsets to find the index for the given
# byte offset.
- def find_line(byte_offset)
- left = 0
- right = offsets.length - 1
-
- while left <= right
- mid = left + (right - left) / 2
- return mid if (offset = offsets[mid]) == byte_offset
-
- if offset < byte_offset
- left = mid + 1
- else
- right = mid - 1
- end
- end
-
- left - 1
+ #--
+ #: (Integer byte_offset) -> Integer
+ def find_line(byte_offset) # :nodoc:
+ index = offsets.bsearch_index { |offset| offset > byte_offset } || offsets.length
+ index - 1
end
end
@@ -187,38 +250,69 @@ module Prism
# has not yet been implemented.
#
class CodeUnitsCache
+ # Counter used for UTF-8, where one code unit equals one byte.
+ class UTF8Counter # :nodoc:
+ #: (Integer byte_offset, Integer byte_length) -> Integer
+ def count(byte_offset, byte_length)
+ byte_length
+ end
+ end
+
class UTF16Counter # :nodoc:
+ # @rbs @source: String
+ # @rbs @encoding: Encoding
+
+ #: (String source, Encoding encoding) -> void
def initialize(source, encoding)
@source = source
@encoding = encoding
end
+ #: (Integer byte_offset, Integer byte_length) -> Integer
def count(byte_offset, byte_length)
- @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
+ (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
end
end
- class LengthCounter # :nodoc:
+ # Counter used for UTF-32, where one code unit equals one code point and
+ # matches String#length. Also used as a best-effort fallback for any other
+ # encoding that does not have a dedicated counter.
+ class UTF32Counter # :nodoc:
+ # @rbs @source: String
+ # @rbs @encoding: Encoding
+
+ #: (String source, Encoding encoding) -> void
def initialize(source, encoding)
@source = source
@encoding = encoding
end
+ #: (Integer byte_offset, Integer byte_length) -> Integer
def count(byte_offset, byte_length)
- @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
+ (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).length
end
end
- private_constant :UTF16Counter, :LengthCounter
+ private_constant :UTF8Counter, :UTF16Counter, :UTF32Counter
+
+ # @rbs @source: String
+ # @rbs @counter: UTF8Counter | UTF16Counter | UTF32Counter
+ # @rbs @cache: Hash[Integer, Integer]
+ # @rbs @offsets: Array[Integer]
# Initialize a new cache with the given source and encoding.
+ #--
+ #: (String source, Encoding encoding) -> void
def initialize(source, encoding)
@source = source
@counter =
- if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
+ case encoding
+ when Encoding::UTF_8
+ UTF8Counter.new
+ when Encoding::UTF_16LE, Encoding::UTF_16BE
UTF16Counter.new(source, encoding)
else
- LengthCounter.new(source, encoding)
+ UTF32Counter.new(source, encoding)
end
@cache = {} #: Hash[Integer, Integer]
@@ -226,6 +320,8 @@ module Prism
end
# Retrieve the code units offset from the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def [](byte_offset)
@cache[byte_offset] ||=
if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
@@ -252,11 +348,15 @@ module Prism
# at that point we will treat everything as single-byte characters.
class ASCIISource < Source
# Return the character offset for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_offset(byte_offset)
byte_offset
end
- # Return the column number in characters for the given byte offset.
+ # Return the column in characters for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_column(byte_offset)
byte_offset - line_start(byte_offset)
end
@@ -267,6 +367,8 @@ module Prism
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
# concept of code units that differs from the number of characters in other
# encodings, it is not captured here.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_offset(byte_offset, encoding)
byte_offset
end
@@ -274,6 +376,8 @@ module Prism
# Returns a cache that is the identity function in order to maintain the
# same interface. We can do this because code units are always equivalent to
# byte offsets for ASCII-only sources.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
->(byte_offset) { byte_offset }
end
@@ -281,6 +385,8 @@ module Prism
# Specialized version of `code_units_column` that does not depend on
# `code_units_offset`, which is a more expensive operation. This is
# essentially the same as `Prism::Source#column`.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_column(byte_offset, encoding)
byte_offset - line_start(byte_offset)
end
@@ -290,18 +396,23 @@ module Prism
class Location
# A Source object that is used to determine more information from the given
# offset and length.
- attr_reader :source
+ attr_reader :source #: Source
protected :source
# The byte offset from the beginning of the source where this location
# starts.
- attr_reader :start_offset
+ attr_reader :start_offset #: Integer
# The length of this location in bytes.
- attr_reader :length
+ attr_reader :length #: Integer
+
+ # @rbs @leading_comments: Array[Comment]?
+ # @rbs @trailing_comments: Array[Comment]?
# Create a new location object with the given source, start byte offset, and
# byte length.
+ #--
+ #: (Source source, Integer start_offset, Integer length) -> void
def initialize(source, start_offset, length)
@source = source
@start_offset = start_offset
@@ -316,53 +427,73 @@ module Prism
# These are the comments that are associated with this location that exist
# before the start of this location.
+ #--
+ #: () -> Array[Comment]
def leading_comments
@leading_comments ||= []
end
# Attach a comment to the leading comments of this location.
+ #--
+ #: (Comment comment) -> void
def leading_comment(comment)
leading_comments << comment
end
# These are the comments that are associated with this location that exist
# after the end of this location.
+ #--
+ #: () -> Array[Comment]
def trailing_comments
@trailing_comments ||= []
end
# Attach a comment to the trailing comments of this location.
+ #--
+ #: (Comment comment) -> void
def trailing_comment(comment)
trailing_comments << comment
end
# Returns all comments that are associated with this location (both leading
# and trailing comments).
+ #--
+ #: () -> Array[Comment]
def comments
- [*@leading_comments, *@trailing_comments]
+ [*@leading_comments, *@trailing_comments] #: Array[Comment]
end
# Create a new location object with the given options.
+ #--
+ #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location
def copy(source: self.source, start_offset: self.start_offset, length: self.length)
Location.new(source, start_offset, length)
end
# Returns a new location that is the result of chopping off the last byte.
+ #--
+ #: () -> Location
def chop
copy(length: length == 0 ? length : length - 1)
end
# Returns a string representation of this location.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
end
# Returns all of the lines of the source code associated with this location.
+ #--
+ #: () -> Array[String]
def source_lines
source.lines
end
# The source code that this location represents.
+ #--
+ #: () -> String
def slice
source.slice(start_offset, length)
end
@@ -370,6 +501,8 @@ module Prism
# The source code that this location represents starting from the beginning
# of the line that this location starts on to the end of the line that this
# location ends on.
+ #--
+ #: () -> String
def slice_lines
line_start = source.line_start(start_offset)
line_end = source.line_end(end_offset)
@@ -378,118 +511,160 @@ module Prism
# The character offset from the beginning of the source where this location
# starts.
+ #--
+ #: () -> Integer
def start_character_offset
source.character_offset(start_offset)
end
# The offset from the start of the file in code units of the given encoding.
+ #--
+ #: (Encoding encoding) -> Integer
def start_code_units_offset(encoding = Encoding::UTF_16LE)
source.code_units_offset(start_offset, encoding)
end
# The start offset from the start of the file in code units using the given
# cache to fetch or calculate the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_start_code_units_offset(cache)
cache[start_offset]
end
# The byte offset from the beginning of the source where this location ends.
+ #--
+ #: () -> Integer
def end_offset
start_offset + length
end
# The character offset from the beginning of the source where this location
# ends.
+ #--
+ #: () -> Integer
def end_character_offset
source.character_offset(end_offset)
end
# The offset from the start of the file in code units of the given encoding.
+ #--
+ #: (Encoding encoding) -> Integer
def end_code_units_offset(encoding = Encoding::UTF_16LE)
source.code_units_offset(end_offset, encoding)
end
# The end offset from the start of the file in code units using the given
# cache to fetch or calculate the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_end_code_units_offset(cache)
cache[end_offset]
end
# The line number where this location starts.
+ #--
+ #: () -> Integer
def start_line
source.line(start_offset)
end
# The content of the line where this location starts before this location.
+ #--
+ #: () -> String
def start_line_slice
offset = source.line_start(start_offset)
source.slice(offset, start_offset - offset)
end
# The line number where this location ends.
+ #--
+ #: () -> Integer
def end_line
source.line(end_offset)
end
- # The column number in bytes where this location starts from the start of
+ # The column in bytes where this location starts from the start of
# the line.
+ #--
+ #: () -> Integer
def start_column
source.column(start_offset)
end
- # The column number in characters where this location ends from the start of
+ # The column in characters where this location ends from the start of
# the line.
+ #--
+ #: () -> Integer
def start_character_column
source.character_column(start_offset)
end
- # The column number in code units of the given encoding where this location
+ # The column in code units of the given encoding where this location
# starts from the start of the line.
+ #--
+ #: (?Encoding encoding) -> Integer
def start_code_units_column(encoding = Encoding::UTF_16LE)
source.code_units_column(start_offset, encoding)
end
# The start column in code units using the given cache to fetch or calculate
# the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_start_code_units_column(cache)
cache[start_offset] - cache[source.line_start(start_offset)]
end
- # The column number in bytes where this location ends from the start of the
+ # The column in bytes where this location ends from the start of the
# line.
+ #--
+ #: () -> Integer
def end_column
source.column(end_offset)
end
- # The column number in characters where this location ends from the start of
+ # The column in characters where this location ends from the start of
# the line.
+ #--
+ #: () -> Integer
def end_character_column
source.character_column(end_offset)
end
- # The column number in code units of the given encoding where this location
+ # The column in code units of the given encoding where this location
# ends from the start of the line.
+ #--
+ #: (?Encoding encoding) -> Integer
def end_code_units_column(encoding = Encoding::UTF_16LE)
source.code_units_column(end_offset, encoding)
end
# The end column in code units using the given cache to fetch or calculate
# the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_end_code_units_column(cache)
cache[end_offset] - cache[source.line_start(end_offset)]
end
# Implement the hash pattern matching interface for Location.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ start_offset: start_offset, end_offset: end_offset }
end
# Implement the pretty print interface for Location.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
end
# Returns true if the given other location is equal to this location.
+ #--
+ #: (untyped other) -> bool
def ==(other)
Location === other &&
other.start_offset == start_offset &&
@@ -499,6 +674,8 @@ module Prism
# Returns a new location that stretches from this location to the given
# other location. Raises an error if this location is not before the other
# location or if they don't share the same source.
+ #--
+ #: (Location other) -> Location
def join(other)
raise "Incompatible sources" if source != other.source
raise "Incompatible locations" if start_offset > other.start_offset
@@ -509,6 +686,8 @@ module Prism
# Join this location with the first occurrence of the string in the source
# that occurs after this location on the same line, and return the new
# location. This will raise an error if the string does not exist.
+ #--
+ #: (String string) -> Location
def adjoin(string)
line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
@@ -522,23 +701,38 @@ module Prism
# This represents a comment that was encountered during parsing. It is the
# base class for all comment types.
class Comment
- # The location of this comment in the source.
- attr_reader :location
+ # The Location of this comment in the source.
+ attr_reader :location #: Location
# Create a new comment object with the given location.
+ #--
+ #: (Location location) -> void
def initialize(location)
@location = location
end
# Implement the hash pattern matching interface for Comment.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ location: location }
end
# Returns the content of the comment by slicing it from the source code.
+ #--
+ #: () -> String
def slice
location.slice
end
+
+ # Returns true if this comment happens on the same line as other code and
+ # false if the comment is by itself. This can only be true for inline
+ # comments and should be false for block comments.
+ #--
+ #: () -> bool
+ def trailing?
+ raise NotImplementedError, "trailing? is not implemented for #{self.class}"
+ end
end
# InlineComment objects are the most common. They correspond to comments in
@@ -546,12 +740,16 @@ module Prism
class InlineComment < Comment
# Returns true if this comment happens on the same line as other code and
# false if the comment is by itself.
+ #--
+ #: () -> bool
def trailing?
!location.start_line_slice.strip.empty?
end
# Returns a string representation of this comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::InlineComment @location=#{location.inspect}>"
end
end
@@ -559,13 +757,17 @@ module Prism
# EmbDocComment objects correspond to comments that are surrounded by =begin
# and =end.
class EmbDocComment < Comment
- # This can only be true for inline comments.
+ # Returns false. This can only be true for inline comments.
+ #--
+ #: () -> bool
def trailing?
false
end
# Returns a string representation of this comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::EmbDocComment @location=#{location.inspect}>"
end
end
@@ -573,34 +775,44 @@ module Prism
# This represents a magic comment that was encountered during parsing.
class MagicComment
# A Location object representing the location of the key in the source.
- attr_reader :key_loc
+ attr_reader :key_loc #: Location
# A Location object representing the location of the value in the source.
- attr_reader :value_loc
+ attr_reader :value_loc #: Location
# Create a new magic comment object with the given key and value locations.
+ #--
+ #: (Location key_loc, Location value_loc) -> void
def initialize(key_loc, value_loc)
@key_loc = key_loc
@value_loc = value_loc
end
# Returns the key of the magic comment by slicing it from the source code.
+ #--
+ #: () -> String
def key
key_loc.slice
end
# Returns the value of the magic comment by slicing it from the source code.
+ #--
+ #: () -> String
def value
value_loc.slice
end
# Implement the hash pattern matching interface for MagicComment.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ key_loc: key_loc, value_loc: value_loc }
end
# Returns a string representation of this magic comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
end
end
@@ -609,18 +821,20 @@ module Prism
class ParseError
# The type of error. This is an _internal_ symbol that is used for
# communicating with translation layers. It is not meant to be public API.
- attr_reader :type
+ attr_reader :type #: Symbol
# The message associated with this error.
- attr_reader :message
+ attr_reader :message #: String
# A Location object representing the location of this error in the source.
- attr_reader :location
+ attr_reader :location #: Location
# The level of this error.
- attr_reader :level
+ attr_reader :level #: Symbol
# Create a new error object with the given message and location.
+ #--
+ #: (Symbol type, String message, Location location, Symbol level) -> void
def initialize(type, message, location, level)
@type = type
@message = message
@@ -629,12 +843,16 @@ module Prism
end
# Implement the hash pattern matching interface for ParseError.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, message: message, location: location, level: level }
end
# Returns a string representation of this error.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
end
end
@@ -643,18 +861,20 @@ module Prism
class ParseWarning
# The type of warning. This is an _internal_ symbol that is used for
# communicating with translation layers. It is not meant to be public API.
- attr_reader :type
+ attr_reader :type #: Symbol
# The message associated with this warning.
- attr_reader :message
+ attr_reader :message #: String
# A Location object representing the location of this warning in the source.
- attr_reader :location
+ attr_reader :location #: Location
# The level of this warning.
- attr_reader :level
+ attr_reader :level #: Symbol
# Create a new warning object with the given message and location.
+ #--
+ #: (Symbol type, String message, Location location, Symbol level) -> void
def initialize(type, message, location, level)
@type = type
@message = message
@@ -663,73 +883,116 @@ module Prism
end
# Implement the hash pattern matching interface for ParseWarning.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, message: message, location: location, level: level }
end
# Returns a string representation of this warning.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
end
end
- # This represents the result of a call to ::parse or ::parse_file. It contains
- # the requested structure, any comments that were encounters, and any errors
- # that were encountered.
+ # This represents the result of a call to Prism.parse or Prism.parse_file.
+ # It contains the requested structure, any comments that were encounters,
+ # and any errors that were encountered.
class Result
# The list of comments that were encountered during parsing.
- attr_reader :comments
+ attr_reader :comments #: Array[Comment]
# The list of magic comments that were encountered during parsing.
- attr_reader :magic_comments
+ attr_reader :magic_comments #: Array[MagicComment]
# An optional location that represents the location of the __END__ marker
# and the rest of the content of the file. This content is loaded into the
# DATA constant when the file being parsed is the main file being executed.
- attr_reader :data_loc
+ attr_reader :data_loc #: Location?
# The list of errors that were generated during parsing.
- attr_reader :errors
+ attr_reader :errors #: Array[ParseError]
# The list of warnings that were generated during parsing.
- attr_reader :warnings
+ attr_reader :warnings #: Array[ParseWarning]
# A Source instance that represents the source code that was parsed.
- attr_reader :source
+ attr_reader :source #: Source
# Create a new result object with the given values.
- def initialize(comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source)
@comments = comments
@magic_comments = magic_comments
@data_loc = data_loc
@errors = errors
@warnings = warnings
+ @continuable = continuable
@source = source
end
# Implement the hash pattern matching interface for Result.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
end
# Returns the encoding of the source code that was parsed.
+ #--
+ #: () -> Encoding
def encoding
source.encoding
end
# Returns true if there were no errors during parsing and false if there
# were.
+ #--
+ #: () -> bool
def success?
errors.empty?
end
# Returns true if there were errors during parsing and false if there were
# not.
+ #--
+ #: () -> bool
def failure?
!success?
end
+ # Returns true if the parsed source is an incomplete expression that could
+ # become valid with additional input. This is useful for REPL contexts (such
+ # as IRB) where the user may be entering a multi-line expression one line at
+ # a time and the implementation needs to determine whether to wait for more
+ # input or to evaluate what has been entered so far.
+ #
+ # Concretely, this returns true when every error present is caused by the
+ # parser reaching the end of the input before a construct was closed (e.g.
+ # an unclosed string, array, block, or keyword), and returns false when any
+ # error is caused by a token that makes the input structurally invalid
+ # regardless of what might follow (e.g. a stray `end`, `]`, or `)` with no
+ # matching opener).
+ #
+ # Examples:
+ #
+ # Prism.parse("1 + [").continuable? #=> true (unclosed array)
+ # Prism.parse("1 + ]").continuable? #=> false (stray ])
+ # Prism.parse("tap do").continuable? #=> true (unclosed block)
+ # Prism.parse("end.tap do").continuable? #=> false (stray end)
+ #
+ #--
+ #: () -> bool
+ def continuable?
+ @continuable
+ end
+
# Create a code units cache for the given encoding.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
source.code_units_cache(encoding)
end
@@ -746,32 +1009,42 @@ module Prism
private_constant :Newlines
# The syntax tree that was parsed from the source code.
- attr_reader :value
+ attr_reader :value #: ProgramNode
# Create a new parse result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for ParseResult.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
# Attach the list of comments to their respective locations in the tree.
+ #--
+ #: () -> void
def attach_comments!
Comments.new(self).attach! # steep:ignore
end
# Walk the tree and mark nodes that are on a new line, loosely emulating
# the behavior of CRuby's `:line` tracepoint event.
+ #--
+ #: () -> void
def mark_newlines!
value.accept(Newlines.new(source.offsets.size)) # steep:ignore
end
# Returns a string representation of the syntax tree with the errors
# displayed inline.
+ #--
+ #: () -> String
def errors_format
Errors.new(self).format
end
@@ -780,16 +1053,20 @@ module Prism
# This is a result specific to the `lex` and `lex_file` methods.
class LexResult < Result
# The list of tokens that were parsed from the source code.
- attr_reader :value
+ attr_reader :value #: Array[[Token, Integer]]
# Create a new lex result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for LexResult.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
end
@@ -798,16 +1075,20 @@ module Prism
class ParseLexResult < Result
# A tuple of the syntax tree and the list of tokens that were parsed from
# the source code.
- attr_reader :value
+ attr_reader :value #: [ProgramNode, Array[[Token, Integer]]]
# Create a new parse lex result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for ParseLexResult.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
end
@@ -815,16 +1096,20 @@ module Prism
# This represents a token from the Ruby source.
class Token
# The Source object that represents the source this token came from.
- attr_reader :source
+ attr_reader :source #: Source
private :source
# The type of token that this token is.
- attr_reader :type
+ attr_reader :type #: Symbol
# A byteslice of the source that this token represents.
- attr_reader :value
+ attr_reader :value #: String
+
+ # @rbs @location: Location | Integer
# Create a new token object with the given type, value, and location.
+ #--
+ #: (Source source, Symbol type, String value, Location | Integer location) -> void
def initialize(source, type, value, location)
@source = source
@type = type
@@ -833,11 +1118,15 @@ module Prism
end
# Implement the hash pattern matching interface for Token.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, value: value, location: location }
end
# A Location object representing the location of this token in the source.
+ #--
+ #: () -> Location
def location
location = @location
return location if location.is_a?(Location)
@@ -845,7 +1134,9 @@ module Prism
end
# Implement the pretty print interface for Token.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.group do
q.text(type.to_s)
self.location.pretty_print(q)
@@ -860,6 +1151,8 @@ module Prism
end
# Returns true if the given other token is equal to this token.
+ #--
+ #: (untyped other) -> bool
def ==(other)
Token === other &&
other.type == type &&
@@ -867,12 +1160,16 @@ module Prism
end
# Returns a string representation of this token.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
location
super
end
# Freeze this object and the objects it contains.
+ #--
+ #: () -> void
def deep_freeze
value.freeze
location.freeze
@@ -887,14 +1184,16 @@ module Prism
class Scope
# The list of local variables that are defined in this scope. This should be
# defined as an array of symbols.
- attr_reader :locals
+ attr_reader :locals #: Array[Symbol]
# The list of local variables that are forwarded to the next scope. This
# should by defined as an array of symbols containing the specific values of
# :*, :**, :&, or :"...".
- attr_reader :forwarding
+ attr_reader :forwarding #: Array[Symbol]
# Create a new scope object with the given locals and forwarding.
+ #--
+ #: (Array[Symbol] locals, Array[Symbol] forwarding) -> void
def initialize(locals, forwarding)
@locals = locals
@forwarding = forwarding
@@ -904,6 +1203,8 @@ module Prism
# Create a new scope with the given locals and forwarding options that is
# suitable for passing into one of the Prism.* methods that accepts the
# `scopes` option.
+ #--
+ #: (?locals: Array[Symbol], ?forwarding: Array[Symbol]) -> Scope
def self.scope(locals: [], forwarding: [])
Scope.new(locals, forwarding)
end
diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb
index 22c4148b2c..df80792d39 100644
--- a/lib/prism/parse_result/comments.rb
+++ b/lib/prism/parse_result/comments.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class ParseResult < Result
@@ -17,32 +20,49 @@ module Prism
# the comment. Otherwise it will favor attaching to the nearest location
# that is after the comment.
class Comments
+ # @rbs!
+ # # An internal interface for a target that comments can be attached
+ # # to. This is either going to be a NodeTarget or a CommentTarget.
+ # interface _CommentTarget
+ # def start_offset: () -> Integer
+ # def end_offset: () -> Integer
+ # def encloses?: (Comment) -> bool
+ # def leading_comment: (Comment) -> void
+ # def trailing_comment: (Comment) -> void
+ # end
+
# A target for attaching comments that is based on a specific node's
# location.
class NodeTarget # :nodoc:
- attr_reader :node
+ attr_reader :node #: node
+ #: (node node) -> void
def initialize(node)
@node = node
end
+ #: () -> Integer
def start_offset
node.start_offset
end
+ #: () -> Integer
def end_offset
node.end_offset
end
+ #: (Comment comment) -> bool
def encloses?(comment)
start_offset <= comment.location.start_offset &&
comment.location.end_offset <= end_offset
end
+ #: (Comment comment) -> void
def leading_comment(comment)
node.location.leading_comment(comment)
end
+ #: (Comment comment) -> void
def trailing_comment(comment)
node.location.trailing_comment(comment)
end
@@ -51,44 +71,54 @@ module Prism
# A target for attaching comments that is based on a location field on a
# node. For example, the `end` token of a ClassNode.
class LocationTarget # :nodoc:
- attr_reader :location
+ attr_reader :location #: Location
+ #: (Location location) -> void
def initialize(location)
@location = location
end
+ #: () -> Integer
def start_offset
location.start_offset
end
+ #: () -> Integer
def end_offset
location.end_offset
end
+ #: (Comment comment) -> bool
def encloses?(comment)
false
end
+ #: (Comment comment) -> void
def leading_comment(comment)
location.leading_comment(comment)
end
+ #: (Comment comment) -> void
def trailing_comment(comment)
location.trailing_comment(comment)
end
end
# The parse result that we are attaching comments to.
- attr_reader :parse_result
+ attr_reader :parse_result #: ParseResult
# Create a new Comments object that will attach comments to the given
# parse result.
+ #--
+ #: (ParseResult parse_result) -> void
def initialize(parse_result)
@parse_result = parse_result
end
# Attach the comments to their respective locations in the tree by
# mutating the parse result.
+ #--
+ #: () -> void
def attach!
parse_result.comments.each do |comment|
preceding, enclosing, following = nearest_targets(parse_result.value, comment)
@@ -116,11 +146,13 @@ module Prism
# Responsible for finding the nearest targets to the given comment within
# the context of the given encapsulating node.
+ #--
+ #: (node node, Comment comment) -> [_CommentTarget?, _CommentTarget?, _CommentTarget?]
def nearest_targets(node, comment)
comment_start = comment.location.start_offset
comment_end = comment.location.end_offset
- targets = [] #: Array[_Target]
+ targets = [] #: Array[_CommentTarget]
node.comment_targets.map do |value|
case value
when StatementsNode
@@ -133,8 +165,8 @@ module Prism
end
targets.sort_by!(&:start_offset)
- preceding = nil #: _Target?
- following = nil #: _Target?
+ preceding = nil #: _CommentTarget?
+ following = nil #: _CommentTarget?
left = 0
right = targets.length
diff --git a/lib/prism/parse_result/errors.rb b/lib/prism/parse_result/errors.rb
index eb4f317248..388309d23d 100644
--- a/lib/prism/parse_result/errors.rb
+++ b/lib/prism/parse_result/errors.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
require "stringio"
@@ -8,14 +11,18 @@ module Prism
# can be used to format the errors in a human-readable way.
class Errors
# The parse result that contains the errors.
- attr_reader :parse_result
+ attr_reader :parse_result #: ParseResult
# Initialize a new set of errors from the given parse result.
+ #--
+ #: (ParseResult parse_result) -> void
def initialize(parse_result)
@parse_result = parse_result
end
# Formats the errors in a human-readable way and return them as a string.
+ #--
+ #: () -> String
def format
error_lines = {} #: Hash[Integer, Array[ParseError]]
parse_result.errors.each do |error|
diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb
index 37f64f8ae2..450c790226 100644
--- a/lib/prism/parse_result/newlines.rb
+++ b/lib/prism/parse_result/newlines.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class ParseResult < Result
@@ -23,13 +26,20 @@ module Prism
# that case. We do that to avoid storing the extra `@newline` instance
# variable on every node if we don't need it.
class Newlines < Visitor
+ # The map of lines indices to whether or not they have been marked as
+ # emitting a newline event.
+ # @rbs @lines: Array[bool]
+
# Create a new Newlines visitor with the given newline offsets.
+ #--
+ #: (Integer lines) -> void
def initialize(lines)
- # @type var lines: Integer
@lines = Array.new(1 + lines, false)
end
- # Permit block/lambda nodes to mark newlines within themselves.
+ # Permit block nodes to mark newlines within themselves.
+ #--
+ #: (BlockNode node) -> void
def visit_block_node(node)
old_lines = @lines
@lines = Array.new(old_lines.size, false)
@@ -41,17 +51,39 @@ module Prism
end
end
- alias_method :visit_lambda_node, :visit_block_node
+ # Permit lambda nodes to mark newlines within themselves.
+ #--
+ #: (LambdaNode node) -> void
+ def visit_lambda_node(node)
+ old_lines = @lines
+ @lines = Array.new(old_lines.size, false)
+
+ begin
+ super(node)
+ ensure
+ @lines = old_lines
+ end
+ end
- # Mark if/unless nodes as newlines.
+ # Mark if nodes as newlines.
+ #--
+ #: (IfNode node) -> void
def visit_if_node(node)
node.newline_flag!(@lines)
super(node)
end
- alias_method :visit_unless_node, :visit_if_node
+ # Mark unless nodes as newlines.
+ #--
+ #: (UnlessNode node) -> void
+ def visit_unless_node(node)
+ node.newline_flag!(@lines)
+ super(node)
+ end
# Permit statements lists to mark newlines within themselves.
+ #--
+ #: (StatementsNode node) -> void
def visit_statements_node(node)
node.body.each do |child|
child.newline_flag!(@lines)
@@ -62,10 +94,16 @@ module Prism
end
class Node
+ # Tracks whether or not this node should emit a newline event when the
+ # instructions that it represents are executed.
+ # @rbs @newline_flag: bool
+
+ #: () -> bool
def newline_flag? # :nodoc:
!!defined?(@newline_flag)
end
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
line = location.start_line
unless lines[line]
@@ -76,48 +114,56 @@ module Prism
end
class BeginNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
# Never mark BeginNode with a newline flag, mark children instead.
end
end
class ParenthesesNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
# Never mark ParenthesesNode with a newline flag, mark children instead.
end
end
class IfNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class UnlessNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class UntilNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class WhileNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class RescueModifierNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
expression.newline_flag!(lines)
end
end
class InterpolatedMatchLastLineNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -125,6 +171,7 @@ module Prism
end
class InterpolatedRegularExpressionNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -132,6 +179,7 @@ module Prism
end
class InterpolatedStringNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -139,6 +187,7 @@ module Prism
end
class InterpolatedSymbolNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -146,6 +195,7 @@ module Prism
end
class InterpolatedXStringNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb
index 03fec26789..be0493df05 100644
--- a/lib/prism/pattern.rb
+++ b/lib/prism/pattern.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# A pattern is an object that wraps a Ruby pattern matching expression. The
@@ -40,7 +43,9 @@ module Prism
class CompilationError < StandardError
# Create a new CompilationError with the given representation of the node
# that caused the error.
- def initialize(repr)
+ #--
+ #: (String repr) -> void
+ def initialize(repr) # :nodoc:
super(<<~ERROR)
prism was unable to compile the pattern you provided into a usable
expression. It failed on to understand the node represented by:
@@ -56,10 +61,13 @@ module Prism
end
# The query that this pattern was initialized with.
- attr_reader :query
+ attr_reader :query #: String
+ # @rbs @compiled: Proc?
# Create a new pattern with the given query. The query should be a string
# containing a Ruby pattern matching expression.
+ #--
+ #: (String query) -> void
def initialize(query)
@query = query
@compiled = nil
@@ -67,6 +75,8 @@ module Prism
# Compile the query into a callable object that can be used to match against
# nodes.
+ #--
+ #: () -> Proc
def compile
result = Prism.parse("case nil\nin #{query}\nend")
@@ -83,7 +93,10 @@ module Prism
# pattern. If a block is given, it will be called with each node that
# matches the pattern. If no block is given, an enumerator will be returned
# that will yield each node that matches the pattern.
- def scan(root)
+ #--
+ #: (node root) -> Enumerator[node, void]
+ #: (node root) { (node) -> void } -> void
+ def scan(root, &blk)
return to_enum(:scan, root) unless block_given?
@compiled ||= compile
@@ -99,23 +112,33 @@ module Prism
# Shortcut for combining two procs into one that returns true if both return
# true.
- def combine_and(left, right)
+ #--
+ #: (Proc left, Proc right) -> Proc
+ def combine_and(left, right) # :nodoc:
->(other) { left.call(other) && right.call(other) }
end
# Shortcut for combining two procs into one that returns true if either
# returns true.
- def combine_or(left, right)
+ #--
+ #: (Proc left, Proc right) -> Proc
+ def combine_or(left, right) # :nodoc:
->(other) { left.call(other) || right.call(other) }
end
- # Raise an error because the given node is not supported.
- def compile_error(node)
+ # Raise an error because the given node is not supported. Note purposefully
+ # not typing this method since it is a no return method that Steep does not
+ # understand.
+ #--
+ #: (node node) -> bot
+ def compile_error(node) # :nodoc:
raise CompilationError, node.inspect
end
# in [foo, bar, baz]
- def compile_array_pattern_node(node)
+ #--
+ #: (ArrayPatternNode node) -> Proc
+ def compile_array_pattern_node(node) # :nodoc:
compile_error(node) if !node.rest.nil? || node.posts.any?
constant = node.constant
@@ -140,12 +163,16 @@ module Prism
end
# in foo | bar
- def compile_alternation_pattern_node(node)
+ #--
+ #: (AlternationPatternNode node) -> Proc
+ def compile_alternation_pattern_node(node) # :nodoc:
combine_or(compile_node(node.left), compile_node(node.right))
end
# in Prism::ConstantReadNode
- def compile_constant_path_node(node)
+ #--
+ #: (ConstantPathNode node) -> Proc
+ def compile_constant_path_node(node) # :nodoc:
parent = node.parent
if parent.is_a?(ConstantReadNode) && parent.slice == "Prism"
@@ -160,12 +187,16 @@ module Prism
# in ConstantReadNode
# in String
- def compile_constant_read_node(node)
+ #--
+ #: (ConstantReadNode node) -> Proc
+ def compile_constant_read_node(node) # :nodoc:
compile_constant_name(node, node.name)
end
# Compile a name associated with a constant.
- def compile_constant_name(node, name)
+ #--
+ #: ((ConstantPathNode | ConstantReadNode) node, Symbol name) -> Proc
+ def compile_constant_name(node, name) # :nodoc:
if Prism.const_defined?(name, false)
clazz = Prism.const_get(name)
@@ -181,9 +212,14 @@ module Prism
# in InstanceVariableReadNode[name: Symbol]
# in { name: Symbol }
- def compile_hash_pattern_node(node)
+ #--
+ #: (HashPatternNode node) -> Proc
+ def compile_hash_pattern_node(node) # :nodoc:
compile_error(node) if node.rest
- compiled_constant = compile_node(node.constant) if node.constant
+
+ if (constant = node.constant)
+ compiled_constant = compile_node(constant)
+ end
preprocessed =
node.elements.to_h do |element|
@@ -211,12 +247,16 @@ module Prism
end
# in nil
- def compile_nil_node(node)
+ #--
+ #: (NilNode node) -> Proc
+ def compile_nil_node(node) # :nodoc:
->(attribute) { attribute.nil? }
end
# in /foo/
- def compile_regular_expression_node(node)
+ #--
+ #: (RegularExpressionNode node) -> Proc
+ def compile_regular_expression_node(node) # :nodoc:
regexp = Regexp.new(node.unescaped, node.closing[1..])
->(attribute) { regexp === attribute }
@@ -224,7 +264,9 @@ module Prism
# in ""
# in "foo"
- def compile_string_node(node)
+ #--
+ #: (StringNode node) -> Proc
+ def compile_string_node(node) # :nodoc:
string = node.unescaped
->(attribute) { string === attribute }
@@ -232,7 +274,9 @@ module Prism
# in :+
# in :foo
- def compile_symbol_node(node)
+ #--
+ #: (SymbolNode node) -> Proc
+ def compile_symbol_node(node) # :nodoc:
symbol = node.unescaped.to_sym
->(attribute) { symbol === attribute }
@@ -240,7 +284,9 @@ module Prism
# Compile any kind of node. Dispatch out to the individual compilation
# methods based on the type of node.
- def compile_node(node)
+ #--
+ #: (node node) -> Proc
+ def compile_node(node) # :nodoc:
case node
when AlternationPatternNode
compile_alternation_pattern_node(node)
diff --git a/lib/prism/polyfill/scan_byte.rb b/lib/prism/polyfill/scan_byte.rb
new file mode 100644
index 0000000000..9276e509fc
--- /dev/null
+++ b/lib/prism/polyfill/scan_byte.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+require "strscan"
+
+# Polyfill for StringScanner#scan_byte, which didn't exist until Ruby 3.4.
+if !(StringScanner.method_defined?(:scan_byte))
+ StringScanner.include(
+ Module.new {
+ def scan_byte # :nodoc:
+ get_byte&.b&.ord
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/warn.rb b/lib/prism/polyfill/warn.rb
index 560380d308..76a4264623 100644
--- a/lib/prism/polyfill/warn.rb
+++ b/lib/prism/polyfill/warn.rb
@@ -7,17 +7,14 @@ if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.pa
Kernel.prepend(
Module.new {
def warn(*msgs, uplevel: nil, category: nil) # :nodoc:
- uplevel =
- case uplevel
- when nil
- 1
- when Integer
- uplevel + 1
- else
- uplevel.to_int + 1
- end
-
- super(*msgs, uplevel: uplevel)
+ case uplevel
+ when nil
+ super(*msgs)
+ when Integer
+ super(*msgs, uplevel: uplevel + 1)
+ else
+ super(*msgs, uplevel: uplevel.to_int + 1)
+ end
end
}
)
@@ -25,17 +22,14 @@ if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.pa
Object.prepend(
Module.new {
def warn(*msgs, uplevel: nil, category: nil) # :nodoc:
- uplevel =
- case uplevel
- when nil
- 1
- when Integer
- uplevel + 1
- else
- uplevel.to_int + 1
- end
-
- super(*msgs, uplevel: uplevel)
+ case uplevel
+ when nil
+ super(*msgs)
+ when Integer
+ super(*msgs, uplevel: uplevel + 1)
+ else
+ super(*msgs, uplevel: uplevel.to_int + 1)
+ end
end
}
)
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index 5cb5a98057..aac056b3f8 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -2,7 +2,7 @@
Gem::Specification.new do |spec|
spec.name = "prism"
- spec.version = "1.4.0"
+ spec.version = "1.9.0"
spec.authors = ["Shopify"]
spec.email = ["ruby@shopify.com"]
@@ -42,31 +42,69 @@ Gem::Specification.new do |spec|
"docs/serialization.md",
"docs/testing.md",
"ext/prism/api_node.c",
- "ext/prism/api_pack.c",
+ "ext/prism/extconf.rb",
"ext/prism/extension.c",
"ext/prism/extension.h",
"include/prism.h",
+ "include/prism/compiler/accel.h",
+ "include/prism/compiler/align.h",
+ "include/prism/compiler/exported.h",
+ "include/prism/compiler/fallthrough.h",
+ "include/prism/compiler/filesystem.h",
+ "include/prism/compiler/flex_array.h",
+ "include/prism/compiler/force_inline.h",
+ "include/prism/compiler/format.h",
+ "include/prism/compiler/inline.h",
+ "include/prism/compiler/nodiscard.h",
+ "include/prism/compiler/nonnull.h",
+ "include/prism/compiler/unused.h",
+ "include/prism/internal/allocator.h",
+ "include/prism/internal/allocator_debug.h",
+ "include/prism/internal/arena.h",
+ "include/prism/internal/bit.h",
+ "include/prism/internal/buffer.h",
+ "include/prism/internal/char.h",
+ "include/prism/internal/comments.h",
+ "include/prism/internal/constant_pool.h",
+ "include/prism/internal/diagnostic.h",
+ "include/prism/internal/encoding.h",
+ "include/prism/internal/integer.h",
+ "include/prism/internal/isinf.h",
+ "include/prism/internal/line_offset_list.h",
+ "include/prism/internal/list.h",
+ "include/prism/internal/magic_comments.h",
+ "include/prism/internal/memchr.h",
+ "include/prism/internal/node.h",
+ "include/prism/internal/options.h",
+ "include/prism/internal/parser.h",
+ "include/prism/internal/regexp.h",
+ "include/prism/internal/serialize.h",
+ "include/prism/internal/source.h",
+ "include/prism/internal/static_literals.h",
+ "include/prism/internal/strncasecmp.h",
+ "include/prism/internal/stringy.h",
+ "include/prism/internal/strpbrk.h",
+ "include/prism/internal/tokens.h",
+ "include/prism/arena.h",
"include/prism/ast.h",
- "include/prism/defines.h",
+ "include/prism/buffer.h",
+ "include/prism/comments.h",
+ "include/prism/constant_pool.h",
"include/prism/diagnostic.h",
- "include/prism/encoding.h",
+ "include/prism/excludes.h",
+ "include/prism/integer.h",
+ "include/prism/json.h",
+ "include/prism/line_offset_list.h",
+ "include/prism/magic_comments.h",
"include/prism/node.h",
"include/prism/options.h",
- "include/prism/pack.h",
"include/prism/parser.h",
"include/prism/prettyprint.h",
- "include/prism/regexp.h",
- "include/prism/static_literals.h",
- "include/prism/util/pm_buffer.h",
- "include/prism/util/pm_char.h",
- "include/prism/util/pm_constant_pool.h",
- "include/prism/util/pm_integer.h",
- "include/prism/util/pm_list.h",
- "include/prism/util/pm_memchr.h",
- "include/prism/util/pm_newline_list.h",
- "include/prism/util/pm_strncasecmp.h",
- "include/prism/util/pm_string.h",
- "include/prism/util/pm_strpbrk.h",
+ "include/prism/serialize.h",
+ "include/prism/source.h",
+ "include/prism/stream.h",
+ "include/prism/string_query.h",
+ "include/prism/stringy.h",
"include/prism/version.h",
"lib/prism.rb",
"lib/prism/compiler.rb",
@@ -79,8 +117,8 @@ Gem::Specification.new do |spec|
"lib/prism/lex_compat.rb",
"lib/prism/mutation_compiler.rb",
"lib/prism/node_ext.rb",
+ "lib/prism/node_find.rb",
"lib/prism/node.rb",
- "lib/prism/pack.rb",
"lib/prism/parse_result.rb",
"lib/prism/parse_result/comments.rb",
"lib/prism/parse_result/errors.rb",
@@ -88,6 +126,7 @@ Gem::Specification.new do |spec|
"lib/prism/pattern.rb",
"lib/prism/polyfill/append_as_bytes.rb",
"lib/prism/polyfill/byteindex.rb",
+ "lib/prism/polyfill/scan_byte.rb",
"lib/prism/polyfill/unpack1.rb",
"lib/prism/polyfill/warn.rb",
"lib/prism/reflection.rb",
@@ -97,73 +136,93 @@ Gem::Specification.new do |spec|
"lib/prism/translation.rb",
"lib/prism/translation/parser.rb",
"lib/prism/translation/parser_current.rb",
- "lib/prism/translation/parser33.rb",
- "lib/prism/translation/parser34.rb",
- "lib/prism/translation/parser35.rb",
+ "lib/prism/translation/parser_versions.rb",
"lib/prism/translation/parser/builder.rb",
"lib/prism/translation/parser/compiler.rb",
"lib/prism/translation/parser/lexer.rb",
"lib/prism/translation/ripper.rb",
+ "lib/prism/translation/ripper/filter.rb",
+ "lib/prism/translation/ripper/lexer.rb",
"lib/prism/translation/ripper/sexp.rb",
"lib/prism/translation/ripper/shim.rb",
"lib/prism/translation/ruby_parser.rb",
"lib/prism/visitor.rb",
"prism.gemspec",
- "rbi/prism.rbi",
- "rbi/prism/compiler.rbi",
- "rbi/prism/dsl.rbi",
- "rbi/prism/inspect_visitor.rbi",
- "rbi/prism/node_ext.rbi",
- "rbi/prism/node.rbi",
- "rbi/prism/parse_result.rbi",
- "rbi/prism/reflection.rbi",
- "rbi/prism/string_query.rbi",
+ "rbi/generated/prism.rbi",
+ "rbi/generated/prism/compiler.rbi",
+ "rbi/generated/prism/desugar_compiler.rbi",
+ "rbi/generated/prism/dispatcher.rbi",
+ "rbi/generated/prism/dot_visitor.rbi",
+ "rbi/generated/prism/dsl.rbi",
+ "rbi/generated/prism/inspect_visitor.rbi",
+ "rbi/generated/prism/lex_compat.rbi",
+ "rbi/generated/prism/mutation_compiler.rbi",
+ "rbi/generated/prism/node.rbi",
+ "rbi/generated/prism/node_ext.rbi",
+ "rbi/generated/prism/node_find.rbi",
+ "rbi/generated/prism/parse_result.rbi",
+ "rbi/generated/prism/pattern.rbi",
+ "rbi/generated/prism/reflection.rbi",
+ "rbi/generated/prism/relocation.rbi",
+ "rbi/generated/prism/serialize.rbi",
+ "rbi/generated/prism/string_query.rbi",
+ "rbi/generated/prism/translation.rbi",
+ "rbi/generated/prism/visitor.rbi",
+ "rbi/generated/prism/parse_result/comments.rbi",
+ "rbi/generated/prism/parse_result/errors.rbi",
+ "rbi/generated/prism/parse_result/newlines.rbi",
"rbi/prism/translation/parser.rbi",
- "rbi/prism/translation/parser33.rbi",
- "rbi/prism/translation/parser34.rbi",
- "rbi/prism/translation/parser35.rbi",
+ "rbi/prism/translation/parser_versions.rbi",
"rbi/prism/translation/ripper.rbi",
- "rbi/prism/visitor.rbi",
- "sig/prism.rbs",
- "sig/prism/compiler.rbs",
- "sig/prism/dispatcher.rbs",
- "sig/prism/dot_visitor.rbs",
- "sig/prism/dsl.rbs",
- "sig/prism/inspect_visitor.rbs",
- "sig/prism/lex_compat.rbs",
- "sig/prism/mutation_compiler.rbs",
- "sig/prism/node_ext.rbs",
- "sig/prism/node.rbs",
- "sig/prism/pack.rbs",
- "sig/prism/parse_result.rbs",
- "sig/prism/parse_result/comments.rbs",
- "sig/prism/pattern.rbs",
- "sig/prism/reflection.rbs",
- "sig/prism/relocation.rbs",
- "sig/prism/serialize.rbs",
- "sig/prism/string_query.rbs",
- "sig/prism/visitor.rbs",
+ "rbi/rubyvm/node_find.rbi",
+ "sig/generated/prism.rbs",
+ "sig/generated/prism/compiler.rbs",
+ "sig/generated/prism/desugar_compiler.rbs",
+ "sig/generated/prism/dispatcher.rbs",
+ "sig/generated/prism/dot_visitor.rbs",
+ "sig/generated/prism/dsl.rbs",
+ "sig/generated/prism/inspect_visitor.rbs",
+ "sig/generated/prism/lex_compat.rbs",
+ "sig/generated/prism/mutation_compiler.rbs",
+ "sig/generated/prism/node.rbs",
+ "sig/generated/prism/node_ext.rbs",
+ "sig/generated/prism/node_find.rbs",
+ "sig/generated/prism/parse_result.rbs",
+ "sig/generated/prism/pattern.rbs",
+ "sig/generated/prism/reflection.rbs",
+ "sig/generated/prism/relocation.rbs",
+ "sig/generated/prism/serialize.rbs",
+ "sig/generated/prism/string_query.rbs",
+ "sig/generated/prism/translation.rbs",
+ "sig/generated/prism/visitor.rbs",
+ "sig/generated/prism/parse_result/comments.rbs",
+ "sig/generated/prism/parse_result/errors.rbs",
+ "sig/generated/prism/parse_result/newlines.rbs",
+ "src/arena.c",
+ "src/buffer.c",
+ "src/char.c",
+ "src/constant_pool.c",
"src/diagnostic.c",
"src/encoding.c",
+ "src/integer.c",
+ "src/json.c",
+ "src/line_offset_list.c",
+ "src/list.c",
+ "src/memchr.c",
"src/node.c",
"src/options.c",
- "src/pack.c",
+ "src/parser.c",
"src/prettyprint.c",
"src/prism.c",
"src/regexp.c",
"src/serialize.c",
+ "src/source.c",
"src/static_literals.c",
- "src/token_type.c",
- "src/util/pm_buffer.c",
- "src/util/pm_char.c",
- "src/util/pm_constant_pool.c",
- "src/util/pm_integer.c",
- "src/util/pm_list.c",
- "src/util/pm_memchr.c",
- "src/util/pm_newline_list.c",
- "src/util/pm_string.c",
- "src/util/pm_strncasecmp.c",
- "src/util/pm_strpbrk.c"
+ "src/string_query.c",
+ "src/stringy.c",
+ "src/strncasecmp.c",
+ "src/strpbrk.c",
+ "src/tokens.c"
]
spec.extensions = ["ext/prism/extconf.rb"]
diff --git a/lib/prism/relocation.rb b/lib/prism/relocation.rb
index 163d2012c5..af0f792827 100644
--- a/lib/prism/relocation.rb
+++ b/lib/prism/relocation.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# Prism parses deterministically for the same input. This provides a nice
@@ -11,6 +14,33 @@ module Prism
# "save" nodes and locations using a minimal amount of memory (just the
# node_id and a field identifier) and then reify them later.
module Relocation
+ # @rbs!
+ # type entry_value = untyped
+ # type entry_values = Hash[Symbol, entry_value]
+ #
+ # interface _Value
+ # def start_line: () -> Integer
+ # def end_line: () -> Integer
+ # def start_offset: () -> Integer
+ # def end_offset: () -> Integer
+ # def start_character_offset: () -> Integer
+ # def end_character_offset: () -> Integer
+ # def cached_start_code_units_offset: (_CodeUnitsCache cache) -> Integer
+ # def cached_end_code_units_offset: (_CodeUnitsCache cache) -> Integer
+ # def start_column: () -> Integer
+ # def end_column: () -> Integer
+ # def start_character_column: () -> Integer
+ # def end_character_column: () -> Integer
+ # def cached_start_code_units_column: (_CodeUnitsCache cache) -> Integer
+ # def cached_end_code_units_column: (_CodeUnitsCache cache) -> Integer
+ # def leading_comments: () -> Array[Comment]
+ # def trailing_comments: () -> Array[Comment]
+ # end
+ #
+ # interface _Field
+ # def fields: (_Value value) -> entry_values
+ # end
+
# An entry in a repository that will lazily reify its values when they are
# first accessed.
class Entry
@@ -20,109 +50,152 @@ module Prism
class MissingValueError < StandardError
end
+ # @rbs @repository: Repository?
+ # @rbs @values: Hash[Symbol, untyped]?
+
# Initialize a new entry with the given repository.
+ #--
+ #: (Repository repository) -> void
def initialize(repository)
@repository = repository
@values = nil
end
# Fetch the filepath of the value.
+ #--
+ #: () -> String
def filepath
fetch_value(:filepath)
end
# Fetch the start line of the value.
+ #--
+ #: () -> Integer
def start_line
fetch_value(:start_line)
end
# Fetch the end line of the value.
+ #--
+ #: () -> Integer
def end_line
fetch_value(:end_line)
end
# Fetch the start byte offset of the value.
+ #--
+ #: () -> Integer
def start_offset
fetch_value(:start_offset)
end
# Fetch the end byte offset of the value.
+ #--
+ #: () -> Integer
def end_offset
fetch_value(:end_offset)
end
# Fetch the start character offset of the value.
+ #--
+ #: () -> Integer
def start_character_offset
fetch_value(:start_character_offset)
end
# Fetch the end character offset of the value.
+ #--
+ #: () -> Integer
def end_character_offset
fetch_value(:end_character_offset)
end
# Fetch the start code units offset of the value, for the encoding that
# was configured on the repository.
+ #--
+ #: () -> Integer
def start_code_units_offset
fetch_value(:start_code_units_offset)
end
# Fetch the end code units offset of the value, for the encoding that was
# configured on the repository.
+ #--
+ #: () -> Integer
def end_code_units_offset
fetch_value(:end_code_units_offset)
end
# Fetch the start byte column of the value.
+ #--
+ #: () -> Integer
def start_column
fetch_value(:start_column)
end
# Fetch the end byte column of the value.
+ #--
+ #: () -> Integer
def end_column
fetch_value(:end_column)
end
# Fetch the start character column of the value.
+ #--
+ #: () -> Integer
def start_character_column
fetch_value(:start_character_column)
end
# Fetch the end character column of the value.
+ #--
+ #: () -> Integer
def end_character_column
fetch_value(:end_character_column)
end
# Fetch the start code units column of the value, for the encoding that
# was configured on the repository.
+ #--
+ #: () -> Integer
def start_code_units_column
fetch_value(:start_code_units_column)
end
# Fetch the end code units column of the value, for the encoding that was
# configured on the repository.
+ #--
+ #: () -> Integer
def end_code_units_column
fetch_value(:end_code_units_column)
end
# Fetch the leading comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
def leading_comments
fetch_value(:leading_comments)
end
# Fetch the trailing comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
def trailing_comments
fetch_value(:trailing_comments)
end
# Fetch the leading and trailing comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
def comments
- leading_comments.concat(trailing_comments)
+ [*leading_comments, *trailing_comments]
end
# Reify the values on this entry with the given values. This is an
# internal-only API that is called from the repository when it is time to
# reify the values.
+ #--
+ #: (entry_values values) -> void
def reify!(values) # :nodoc:
@repository = nil
@values = values
@@ -131,6 +204,8 @@ module Prism
private
# Fetch a value from the entry, raising an error if it is missing.
+ #--
+ #: (Symbol name) -> entry_value
def fetch_value(name)
values.fetch(name) do
raise MissingValueError, "No value for #{name}, make sure the " \
@@ -139,27 +214,35 @@ module Prism
end
# Return the values from the repository, reifying them if necessary.
+ #--
+ #: () -> entry_values
def values
- @values || (@repository.reify!; @values)
+ @values || (@repository&.reify!; @values) #: entry_values
end
end
# Represents the source of a repository that will be reparsed.
class Source
# The value that will need to be reparsed.
- attr_reader :value
+ attr_reader :value #: untyped
# Initialize the source with the given value.
+ #--
+ #: (untyped value) -> void
def initialize(value)
@value = value
end
# Reparse the value and return the parse result.
+ #--
+ #: () -> ParseResult
def result
raise NotImplementedError, "Subclasses must implement #result"
end
# Create a code units cache for the given encoding.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
result.code_units_cache(encoding)
end
@@ -168,6 +251,8 @@ module Prism
# A source that is represented by a file path.
class SourceFilepath < Source
# Reparse the file and return the parse result.
+ #--
+ #: () -> ParseResult
def result
Prism.parse_file(value)
end
@@ -176,6 +261,8 @@ module Prism
# A source that is represented by a string.
class SourceString < Source
# Reparse the string and return the parse result.
+ #--
+ #: () -> ParseResult
def result
Prism.parse(value)
end
@@ -184,14 +271,18 @@ module Prism
# A field that represents the file path.
class FilepathField
# The file path that this field represents.
- attr_reader :value
+ attr_reader :value #: String
# Initialize a new field with the given file path.
+ #--
+ #: (String value) -> void
def initialize(value)
@value = value
end
# Fetch the file path.
+ #--
+ #: (_Value _value) -> entry_values
def fields(_value)
{ filepath: value }
end
@@ -200,6 +291,8 @@ module Prism
# A field representing the start and end lines.
class LinesField
# Fetches the start and end line of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ start_line: value.start_line, end_line: value.end_line }
end
@@ -208,6 +301,8 @@ module Prism
# A field representing the start and end byte offsets.
class OffsetsField
# Fetches the start and end byte offset of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ start_offset: value.start_offset, end_offset: value.end_offset }
end
@@ -216,6 +311,8 @@ module Prism
# A field representing the start and end character offsets.
class CharacterOffsetsField
# Fetches the start and end character offset of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_character_offset: value.start_character_offset,
@@ -228,12 +325,16 @@ module Prism
class CodeUnitOffsetsField
# A pointer to the repository object that is used for lazily creating a
# code units cache.
- attr_reader :repository
+ attr_reader :repository #: Repository
# The associated encoding for the code units.
- attr_reader :encoding
+ attr_reader :encoding #: Encoding
+
+ # @rbs @cache: _CodeUnitsCache?
# Initialize a new field with the associated repository and encoding.
+ #--
+ #: (Repository repository, Encoding encoding) -> void
def initialize(repository, encoding)
@repository = repository
@encoding = encoding
@@ -242,6 +343,8 @@ module Prism
# Fetches the start and end code units offset of a value for a particular
# encoding.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_code_units_offset: value.cached_start_code_units_offset(cache),
@@ -252,6 +355,8 @@ module Prism
private
# Lazily create a code units cache for the associated encoding.
+ #--
+ #: () -> _CodeUnitsCache
def cache
@cache ||= repository.code_units_cache(encoding)
end
@@ -260,6 +365,8 @@ module Prism
# A field representing the start and end byte columns.
class ColumnsField
# Fetches the start and end byte column of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ start_column: value.start_column, end_column: value.end_column }
end
@@ -268,6 +375,8 @@ module Prism
# A field representing the start and end character columns.
class CharacterColumnsField
# Fetches the start and end character column of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_character_column: value.start_character_column,
@@ -281,12 +390,16 @@ module Prism
class CodeUnitColumnsField
# The repository object that is used for lazily creating a code units
# cache.
- attr_reader :repository
+ attr_reader :repository #: Repository
# The associated encoding for the code units.
- attr_reader :encoding
+ attr_reader :encoding #: Encoding
+
+ # @rbs @cache: _CodeUnitsCache?
# Initialize a new field with the associated repository and encoding.
+ #--
+ #: (Repository repository, Encoding encoding) -> void
def initialize(repository, encoding)
@repository = repository
@encoding = encoding
@@ -295,6 +408,8 @@ module Prism
# Fetches the start and end code units column of a value for a particular
# encoding.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_code_units_column: value.cached_start_code_units_column(cache),
@@ -305,6 +420,8 @@ module Prism
private
# Lazily create a code units cache for the associated encoding.
+ #--
+ #: () -> _CodeUnitsCache
def cache
@cache ||= repository.code_units_cache(encoding)
end
@@ -315,9 +432,11 @@ module Prism
# An object that represents a slice of a comment.
class Comment
# The slice of the comment.
- attr_reader :slice
+ attr_reader :slice #: String
# Initialize a new comment with the given slice.
+ #
+ #: (String slice) -> void
def initialize(slice)
@slice = slice
end
@@ -326,6 +445,8 @@ module Prism
private
# Create comment objects from the given values.
+ #--
+ #: (entry_value values) -> Array[Comment]
def comments(values)
values.map { |value| Comment.new(value.slice) }
end
@@ -334,6 +455,8 @@ module Prism
# A field representing the leading comments.
class LeadingCommentsField < CommentsField
# Fetches the leading comments of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ leading_comments: comments(value.leading_comments) }
end
@@ -342,6 +465,8 @@ module Prism
# A field representing the trailing comments.
class TrailingCommentsField < CommentsField
# Fetches the trailing comments of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ trailing_comments: comments(value.trailing_comments) }
end
@@ -357,15 +482,17 @@ module Prism
# The source associated with this repository. This will be either a
# SourceFilepath (the most common use case) or a SourceString.
- attr_reader :source
+ attr_reader :source #: Source
# The fields that have been configured on this repository.
- attr_reader :fields
+ attr_reader :fields #: Hash[Symbol, _Field]
# The entries that have been saved on this repository.
- attr_reader :entries
+ attr_reader :entries #: Hash[Integer, Hash[Symbol, Entry]]
# Initialize a new repository with the given source.
+ #--
+ #: (Source source) -> void
def initialize(source)
@source = source
@fields = {}
@@ -373,69 +500,93 @@ module Prism
end
# Create a code units cache for the given encoding from the source.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
source.code_units_cache(encoding)
end
# Configure the filepath field for this repository and return self.
+ #--
+ #: () -> self
def filepath
raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath)
field(:filepath, FilepathField.new(source.value))
end
# Configure the lines field for this repository and return self.
+ #--
+ #: () -> self
def lines
field(:lines, LinesField.new)
end
# Configure the offsets field for this repository and return self.
+ #--
+ #: () -> self
def offsets
field(:offsets, OffsetsField.new)
end
# Configure the character offsets field for this repository and return
# self.
+ #--
+ #: () -> self
def character_offsets
field(:character_offsets, CharacterOffsetsField.new)
end
# Configure the code unit offsets field for this repository for a specific
# encoding and return self.
+ #--
+ #: (Encoding encoding) -> self
def code_unit_offsets(encoding)
field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding))
end
# Configure the columns field for this repository and return self.
+ #--
+ #: () -> self
def columns
field(:columns, ColumnsField.new)
end
# Configure the character columns field for this repository and return
# self.
+ #--
+ #: () -> self
def character_columns
field(:character_columns, CharacterColumnsField.new)
end
# Configure the code unit columns field for this repository for a specific
# encoding and return self.
+ #--
+ #: (Encoding encoding) -> self
def code_unit_columns(encoding)
field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding))
end
# Configure the leading comments field for this repository and return
# self.
+ #--
+ #: () -> self
def leading_comments
field(:leading_comments, LeadingCommentsField.new)
end
# Configure the trailing comments field for this repository and return
# self.
+ #--
+ #: () -> self
def trailing_comments
field(:trailing_comments, TrailingCommentsField.new)
end
# Configure both the leading and trailing comment fields for this
# repository and return self.
+ #--
+ #: () -> self
def comments
leading_comments.trailing_comments
end
@@ -443,6 +594,8 @@ module Prism
# This method is called from nodes and locations when they want to enter
# themselves into the repository. It it internal-only and meant to be
# called from the #save* APIs.
+ #--
+ #: (Integer node_id, Symbol field_name) -> Entry
def enter(node_id, field_name) # :nodoc:
entry = Entry.new(self)
@entries[node_id][field_name] = entry
@@ -452,6 +605,8 @@ module Prism
# This method is called from the entries in the repository when they need
# to reify their values. It is internal-only and meant to be called from
# the various value APIs.
+ #--
+ #: () -> void
def reify! # :nodoc:
result = source.result
@@ -465,7 +620,7 @@ module Prism
while (node = queue.shift)
@entries[node.node_id].each do |field_name, entry|
value = node.public_send(field_name)
- values = {} #: Hash[Symbol, untyped]
+ values = {} #: entry_values
fields.each_value do |field|
values.merge!(field.fields(value))
@@ -484,6 +639,8 @@ module Prism
# Append the given field to the repository and return the repository so
# that these calls can be chained.
+ #--
+ #: (Symbol name, _Field) -> self
def field(name, value)
raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name)
@fields[name] = value
@@ -492,11 +649,15 @@ module Prism
end
# Create a new repository for the given filepath.
+ #--
+ #: (String value) -> Repository
def self.filepath(value)
Repository.new(SourceFilepath.new(value))
end
# Create a new repository for the given string.
+ #--
+ #: (String value) -> Repository
def self.string(value)
Repository.new(SourceString.new(value))
end
diff --git a/lib/prism/string_query.rb b/lib/prism/string_query.rb
index 9011051d2b..99ce57e5fe 100644
--- a/lib/prism/string_query.rb
+++ b/lib/prism/string_query.rb
@@ -1,28 +1,44 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# Query methods that allow categorizing strings based on their context for
# where they could be valid in a Ruby syntax tree.
class StringQuery
+ # @rbs!
+ # def self.local?: (String string) -> bool
+ # def self.constant?: (String string) -> bool
+ # def self.method_name?: (String string) -> bool
+
# The string that this query is wrapping.
- attr_reader :string
+ attr_reader :string #: String
# Initialize a new query with the given string.
+ #--
+ #: (String string) -> void
def initialize(string)
@string = string
end
# Whether or not this string is a valid local variable name.
+ #--
+ #: () -> bool
def local?
StringQuery.local?(string)
end
# Whether or not this string is a valid constant name.
+ #--
+ #: () -> bool
def constant?
StringQuery.constant?(string)
end
# Whether or not this string is a valid method name.
+ #--
+ #: () -> bool
def method_name?
StringQuery.method_name?(string)
end
diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb
index 511c80febc..5a086a7542 100644
--- a/lib/prism/translation.rb
+++ b/lib/prism/translation.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# This module is responsible for converting the prism syntax tree into other
@@ -6,9 +9,11 @@ module Prism
module Translation # steep:ignore
autoload :Parser, "prism/translation/parser"
autoload :ParserCurrent, "prism/translation/parser_current"
- autoload :Parser33, "prism/translation/parser33"
- autoload :Parser34, "prism/translation/parser34"
- autoload :Parser35, "prism/translation/parser35"
+ autoload :Parser33, "prism/translation/parser_versions"
+ autoload :Parser34, "prism/translation/parser_versions"
+ autoload :Parser35, "prism/translation/parser_versions"
+ autoload :Parser40, "prism/translation/parser_versions"
+ autoload :Parser41, "prism/translation/parser_versions"
autoload :Ripper, "prism/translation/ripper"
autoload :RubyParser, "prism/translation/ruby_parser"
end
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
index d43ad7c1e4..70031f133a 100644
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
begin
required_version = ">= 3.3.7.2"
@@ -18,6 +19,13 @@ module Prism
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
+ #
+ # Note that this version of the parser always parses using the latest
+ # version of Ruby syntax supported by Prism. If you want specific version
+ # support, use one of the version-specific subclasses, such as
+ # `Prism::Translation::Parser34`. If you want to parse using the same
+ # version of Ruby syntax as the currently running version of Ruby, use
+ # `Prism::Translation::ParserCurrent`.
class Parser < ::Parser::Base
Diagnostic = ::Parser::Diagnostic # :nodoc:
private_constant :Diagnostic
@@ -25,7 +33,7 @@ module Prism
# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
- class PrismDiagnostic < Diagnostic
+ class PrismDiagnostic < Diagnostic # :nodoc:
# This is the cached message coming from prism.
attr_reader :message
@@ -76,7 +84,7 @@ module Prism
end
def version # :nodoc:
- 34
+ 41
end
# The default encoding for Ruby files is UTF-8.
@@ -348,8 +356,10 @@ module Prism
"3.3.1"
when 34
"3.4.0"
- when 35
- "3.5.0"
+ when 35, 40
+ "4.0.0"
+ when 41
+ "4.1.0"
else
"latest"
end
diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb
index d3b51f4275..7fc3bba6b7 100644
--- a/lib/prism/translation/parser/builder.rb
+++ b/lib/prism/translation/parser/builder.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
module Prism
module Translation
@@ -6,12 +7,14 @@ module Prism
# A builder that knows how to convert more modern Ruby syntax
# into whitequark/parser gem's syntax tree.
class Builder < ::Parser::Builders::Default
- # It represents the `it` block argument, which is not yet implemented in the Parser gem.
+ # It represents the `it` block argument, which is not yet implemented in
+ # the Parser gem.
def itarg
n(:itarg, [:it], nil)
end
- # The following three lines have been added to support the `it` block parameter syntax in the source code below.
+ # The following three lines have been added to support the `it` block
+ # parameter syntax in the source code below.
#
# if args.type == :itarg
# block_type = :itblock
@@ -55,6 +58,12 @@ module Prism
method_call.loc.with_expression(join_exprs(method_call, block)))
end
end
+
+ # def foo(&nil); end
+ # ^^^^
+ def blocknilarg(amper_t, nil_t)
+ n0(:blocknilarg, arg_prefix_map(amper_t, nil_t))
+ end
end
end
end
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 0bd9d74f93..d11db12ae6 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1,13 +1,14 @@
# frozen_string_literal: true
+# :markup: markdown
module Prism
module Translation
class Parser
# A visitor that knows how to convert a prism syntax tree into the
# whitequark/parser gem's syntax tree.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# Raised when the tree is malformed or there is a bug in the compiler.
- class CompilationError < StandardError
+ class CompilationError < StandardError # :nodoc:
end
# The Parser::Base instance that is being used to build the AST.
@@ -216,7 +217,7 @@ module Prism
rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
token(rescue_clause.operator_loc),
visit(rescue_clause.reference),
- srange_find(find_start_offset, find_end_offset, ";"),
+ srange_semicolon(find_start_offset, find_end_offset),
visit(rescue_clause.statements)
)
end until (rescue_clause = rescue_clause.subsequent).nil?
@@ -296,11 +297,6 @@ module Prism
if node.call_operator_loc.nil?
case name
- when :-@
- case (receiver = node.receiver).type
- when :integer_node, :float_node, :rational_node, :imaginary_node
- return visit(numeric_negate(node.message_loc, receiver))
- end
when :!
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
when :=~
@@ -322,7 +318,7 @@ module Prism
visit_all(arguments),
token(node.closing_loc),
),
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
),
block
@@ -339,7 +335,7 @@ module Prism
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
builder.assign(
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
)
else
@@ -788,7 +784,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
token(node.end_keyword_loc)
@@ -920,7 +916,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
case node.subsequent
@@ -986,7 +982,7 @@ module Prism
if (then_loc = node.then_loc)
token(then_loc)
else
- srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";")
+ srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1323,7 +1319,7 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
+ def visit_error_recovery_node(node)
::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
end
@@ -1389,6 +1385,12 @@ module Prism
builder.nil(token(node.location))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc))
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1766,7 +1768,7 @@ module Prism
end
else
parts =
- if node.value == ""
+ if node.value_loc.nil?
[]
elsif node.value.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
@@ -1807,7 +1809,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset)
end,
visit(node.else_clause),
token(node.else_clause&.else_keyword_loc),
@@ -1838,7 +1840,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
end,
visit(node.statements),
token(node.closing_loc)
@@ -1862,7 +1864,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";")
+ srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1882,7 +1884,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
end,
visit(node.statements),
token(node.closing_loc)
@@ -1966,22 +1968,6 @@ module Prism
elements
end
- # Negate the value of a numeric node. This is a special case where you
- # have a negative sign on one line and then a number on the next line.
- # In normal Ruby, this will always be a method call. The parser gem,
- # however, marks this as a numeric literal. We have to massage the tree
- # here to get it into the correct form.
- def numeric_negate(message_loc, receiver)
- case receiver.type
- when :integer_node, :float_node
- receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
- when :rational_node
- receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location))
- when :imaginary_node
- receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
- end
- end
-
# Blocks can have a special set of parameters that automatically expand
# when given arrays if they have a single required parameter and no
# other parameters.
@@ -2011,16 +1997,16 @@ module Prism
Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
end
- # Constructs a new source range by finding the given character between
- # the given start offset and end offset. If the needle is not found, it
- # returns nil. Importantly it does not search past newlines or comments.
+ # Constructs a new source range by finding a semicolon between the given
+ # start offset and end offset. If the semicolon is not found, it returns
+ # nil. Importantly it does not search past newlines or comments.
#
# Note that end_offset is allowed to be nil, in which case this will
# search until the end of the string.
- def srange_find(start_offset, end_offset, character)
- if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/])
+ def srange_semicolon(start_offset, end_offset)
+ if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/])
final_offset = start_offset + match.bytesize
- [character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])]
+ [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])]
end
end
@@ -2192,7 +2178,7 @@ module Prism
else
lines.sum do |line|
count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
- count -= 1 if !line.end_with?("\n") && count > 0
+ count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0
count
end
end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 8f2d065b73..e82042867f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -1,14 +1,16 @@
# frozen_string_literal: true
+# :markup: markdown
require "strscan"
require_relative "../../polyfill/append_as_bytes"
+require_relative "../../polyfill/scan_byte"
module Prism
module Translation
class Parser
# Accepts a list of prism tokens and converts them into the expected
# format for the parser gem.
- class Lexer
+ class Lexer # :nodoc:
# These tokens are always skipped
TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
private_constant :TYPES_ALWAYS_SKIP
@@ -16,8 +18,6 @@ module Prism
# The direct translating of types between the two lexers.
TYPES = {
# These tokens should never appear in the output of the lexer.
- MISSING: nil,
- NOT_PROVIDED: nil,
EMBDOC_END: nil,
EMBDOC_LINE: nil,
@@ -87,6 +87,7 @@ module Prism
KEYWORD_DEF: :kDEF,
KEYWORD_DEFINED: :kDEFINED,
KEYWORD_DO: :kDO,
+ KEYWORD_DO_BLOCK: :kDO_BLOCK,
KEYWORD_DO_LOOP: :kDO_COND,
KEYWORD_END: :kEND,
KEYWORD_END_UPCASE: :klEND,
@@ -188,8 +189,8 @@ module Prism
# without them. We should find another way to do this, but in the
# meantime we'll hide them from the documentation and mark them as
# private constants.
- EXPR_BEG = 0x1 # :nodoc:
- EXPR_LABEL = 0x400 # :nodoc:
+ EXPR_BEG = 0x1
+ EXPR_LABEL = 0x400
# It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`.
#
@@ -201,7 +202,7 @@ module Prism
# The following token types are listed as those classified as `tLPAREN`.
LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
:kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
- :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
+ :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
])
# Types of tokens that are allowed to continue a method call with comments in-between.
@@ -232,7 +233,7 @@ module Prism
@offset_cache = offset_cache
end
- Range = ::Parser::Source::Range # :nodoc:
+ Range = ::Parser::Source::Range
private_constant :Range
# Convert the prism tokens into the expected format for the parser gem.
@@ -275,20 +276,20 @@ module Prism
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
- while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
+ while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
value += next_token.value
- location = range(token.location.start_offset, lexed[index][0].location.end_offset)
+ location = range(token.location.start_offset, next_token.location.end_offset)
index += 1
else
is_at_eol = value.chomp!.nil?
location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
- prev_token = lexed[index - 2][0] if index - 2 >= 0
- next_token = lexed[index][0]
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ next_token, _ = lexed[index]
is_inline_comment = prev_token&.location&.start_line == token.location.start_line
if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
@@ -307,7 +308,7 @@ module Prism
end
end
when :tNL
- next_token = next_token = lexed[index][0]
+ next_token, _ = lexed[index]
# Newlines after comments are emitted out of order.
if next_token&.type == :COMMENT
comment_newline_location = location
@@ -344,8 +345,8 @@ module Prism
location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
value = nil
when :tSTRING_BEG
- next_token = lexed[index][0]
- next_next_token = lexed[index + 1][0]
+ next_token, _ = lexed[index]
+ next_next_token, _ = lexed[index + 1]
basic_quotes = value == '"' || value == "'"
if basic_quotes && next_token&.type == :STRING_END
@@ -413,7 +414,8 @@ module Prism
while token.type == :STRING_CONTENT
current_length += token.value.bytesize
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
# The parser gem only removes indentation when the heredoc is not nested
not_nested = heredoc_stack.size == 1
if is_percent_array
@@ -423,11 +425,16 @@ module Prism
end
current_string << unescape_string(value, quote_stack.last)
- if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
+ relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
+ 0 # the last backslash escapes the newline
+ else
+ token.value[/(\\{1,})\n/, 1]&.length || 0
+ end
+ if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
break
end
- token = lexed[index][0]
+ token, _ = lexed[index]
index += 1
end
else
@@ -482,7 +489,7 @@ module Prism
end
if percent_array?(quote_stack.pop)
- prev_token = lexed[index - 2][0] if index - 2 >= 0
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
ends_with_whitespace = prev_token&.type == :WORDS_SEP
# parser always emits a space token after content in a percent array, even if no actual whitespace is present.
@@ -491,7 +498,7 @@ module Prism
end
end
when :tSYMBEG
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
+ if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
@@ -506,13 +513,13 @@ module Prism
type = :tIDENTIFIER
end
when :tXSTRING_BEG
- if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
+ if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
# self.`()
type = :tBACK_REF2
end
quote_stack.push(value)
when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
- if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
+ if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
index += 1
end
@@ -588,9 +595,9 @@ module Prism
previous_line = -1
result = Float::MAX
- while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
+ while (next_token = lexed[next_token_index]&.first)
next_token_index += 1
- next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
+ next_next_token, _ = lexed[next_token_index]
first_token_on_line = next_token.location.start_column == 0
# String content inside nested heredocs and interpolation is ignored
@@ -761,12 +768,12 @@ module Prism
elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
# \M-x where x is an ASCII printable character
escape_read(result, scanner, control, true)
- elsif (byte = scanner.get_byte)
+ elsif (byte = scanner.scan_byte)
# Something else after an escape.
- if control && byte == "?"
+ if control && byte == 0x3f # ASCII '?'
result.append_as_bytes(escape_build(0x7f, false, meta))
else
- result.append_as_bytes(escape_build(byte.ord, control, meta))
+ result.append_as_bytes(escape_build(byte, control, meta))
end
end
end
diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb
deleted file mode 100644
index b09266e06a..0000000000
--- a/lib/prism/translation/parser33.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
- class Parser33 < Parser
- def version # :nodoc:
- 33
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb
deleted file mode 100644
index 0ead70ad3c..0000000000
--- a/lib/prism/translation/parser34.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
- class Parser34 < Parser
- def version # :nodoc:
- 34
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser35.rb b/lib/prism/translation/parser35.rb
deleted file mode 100644
index a6abc12589..0000000000
--- a/lib/prism/translation/parser35.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.5 of `Prism::Translation::Parser`.
- class Parser35 < Parser
- def version # :nodoc:
- 35
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb
index b44769fde7..f7c1070e30 100644
--- a/lib/prism/translation/parser_current.rb
+++ b/lib/prism/translation/parser_current.rb
@@ -1,4 +1,6 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
# typed: ignore
module Prism
@@ -8,11 +10,13 @@ module Prism
ParserCurrent = Parser33
when /^3\.4\./
ParserCurrent = Parser34
- when /^3\.5\./
- ParserCurrent = Parser35
+ when /^3\.5\./, /^4\.0\./
+ ParserCurrent = Parser40
+ when /^4\.1\./
+ ParserCurrent = Parser41
else
# Keep this in sync with released Ruby.
- parser = Parser34
+ parser = Parser40
major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
"but you are running #{major}.#{minor}."
diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb
new file mode 100644
index 0000000000..720c7d548c
--- /dev/null
+++ b/lib/prism/translation/parser_versions.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
+ class Parser33 < Parser
+ def version # :nodoc:
+ 33
+ end
+ end
+
+ # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
+ class Parser34 < Parser
+ def version # :nodoc:
+ 34
+ end
+ end
+
+ # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`.
+ class Parser40 < Parser
+ def version # :nodoc:
+ 40
+ end
+ end
+
+ Parser35 = Parser40 # :nodoc:
+
+ # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`.
+ class Parser41 < Parser
+ def version # :nodoc:
+ 41
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index 95f366ac91..f179a149a1 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -1,6 +1,5 @@
# frozen_string_literal: true
-
-require "ripper"
+# :markup: markdown
module Prism
module Translation
@@ -23,22 +22,10 @@ module Prism
# - on_comma
# - on_ignored_nl
# - on_ignored_sp
- # - on_kw
- # - on_label_end
- # - on_lbrace
- # - on_lbracket
- # - on_lparen
# - on_nl
- # - on_op
# - on_operator_ambiguous
- # - on_rbrace
- # - on_rbracket
- # - on_rparen
# - on_semicolon
# - on_sp
- # - on_symbeg
- # - on_tstring_beg
- # - on_tstring_end
#
class Ripper < Compiler
# Parses the given Ruby program read from +src+.
@@ -70,7 +57,8 @@ module Prism
# [[1, 13], :on_kw, "end", END ]]
#
def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
- result = Prism.lex_compat(src, filepath: filename, line: lineno)
+ coerced = coerce_source(src)
+ result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
if result.failure? && raise_errors
raise SyntaxError, result.errors.first.message
@@ -79,6 +67,34 @@ module Prism
end
end
+ # Tokenizes the Ruby program and returns an array of strings.
+ # The +filename+ and +lineno+ arguments are mostly ignored, since the
+ # return value is just the tokenized input.
+ # By default, this method does not handle syntax errors in +src+,
+ # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+.
+ #
+ # p Ripper.tokenize("def m(a) nil end")
+ # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
+ #
+ def self.tokenize(...)
+ lex(...).map { |token| token[2] }
+ end
+
+ # Mirros the various lex_types that ripper supports
+ def self.coerce_source(source) # :nodoc:
+ if source.is_a?(IO)
+ source.read
+ elsif source.respond_to?(:gets)
+ src = +""
+ while line = source.gets
+ src << line
+ end
+ src
+ else
+ source.to_str
+ end
+ end
+
# This contains a table of all of the parser events and their
# corresponding arity.
PARSER_EVENT_TABLE = {
@@ -331,7 +347,7 @@ module Prism
"__ENCODING__",
"__FILE__",
"__LINE__"
- ]
+ ].to_set
# A list of all of the Ruby binary operators.
BINARY_OPERATORS = [
@@ -356,7 +372,7 @@ module Prism
:/,
:*,
:**
- ]
+ ].to_set
private_constant :KEYWORDS, :BINARY_OPERATORS
@@ -425,9 +441,93 @@ module Prism
end
end
+ autoload :Filter, "prism/translation/ripper/filter"
+ autoload :Lexer, "prism/translation/ripper/lexer"
autoload :SexpBuilder, "prism/translation/ripper/sexp"
autoload :SexpBuilderPP, "prism/translation/ripper/sexp"
+ # Provides optimized access to line and column information.
+ # Ripper bounds are mostly accessed in a linear fashion, so
+ # we can try a linear scan first and fall back to binary search.
+ class LineAndColumnCache # :nodoc:
+ # How many should it look ahead/behind before falling back to binary searching.
+ WINDOW = 8
+ private_constant :WINDOW
+
+ #: (Source source) -> void
+ def initialize(source)
+ @source = source
+ @offsets = source.offsets
+ @hint = 0
+ end
+
+ #: (Integer byte_offset) -> [Integer, Integer]
+ def line_and_column(byte_offset)
+ @hint = new_hint(byte_offset) || @source.find_line(byte_offset)
+ return [@hint + @source.start_line, byte_offset - @offsets[@hint]]
+ end
+
+ private
+
+ def new_hint(byte_offset)
+ if @offsets[@hint] <= byte_offset
+ # Same line?
+ if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset)
+ return @hint
+ end
+
+ # Scan forwards
+ limit = [@hint + WINDOW + 1, @offsets.size].min
+ idx = @hint + 1
+ while idx < limit
+ if @offsets[idx] > byte_offset
+ return idx - 1
+ end
+ if @offsets[idx] == byte_offset
+ return idx
+ end
+ idx += 1
+ end
+ else
+ # Scan backwards
+ limit = @hint > WINDOW ? @hint - WINDOW : 0
+ idx = @hint
+ while idx >= limit + 1
+ if @offsets[idx - 1] <= byte_offset
+ return idx - 1
+ end
+ idx -= 1
+ end
+ end
+
+ nil
+ end
+ end
+
+ # :stopdoc:
+ # This is not part of the public API but used by some gems.
+
+ # Ripper-internal bitflags.
+ LEX_STATE_NAMES = %i[
+ BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+ ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze
+ private_constant :LEX_STATE_NAMES
+
+ LEX_STATE_NAMES.each do |value, key|
+ const_set("EXPR_#{key}", value)
+ end
+ EXPR_NONE = 0
+ EXPR_VALUE = EXPR_BEG
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
+
+ def self.lex_state_name(state)
+ LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|")
+ end
+
+ # :startdoc:
+
# The source that is being parsed.
attr_reader :source
@@ -437,16 +537,17 @@ module Prism
# The current line number of the parser.
attr_reader :lineno
- # The current column number of the parser.
+ # The current column in bytes of the parser.
attr_reader :column
# Create a new Translation::Ripper object with the given source.
def initialize(source, filename = "(ripper)", lineno = 1)
- @source = source
+ @source = Ripper.coerce_source(source)
@filename = filename
@lineno = lineno
@column = 0
@result = nil
+ @line_and_column_cache = nil
end
##########################################################################
@@ -465,7 +566,12 @@ module Prism
bounds(location)
if comment.is_a?(InlineComment)
- on_comment(comment.slice)
+ # Inline comments always contain a newline if the line itself contains it
+ if result.source.source.bytesize > comment.location.end_offset
+ on_comment("#{comment.slice}\n")
+ else
+ on_comment(comment.slice)
+ end
else
offset = location.start_offset
lines = comment.slice.lines
@@ -546,9 +652,14 @@ module Prism
# Visitor methods
##########################################################################
+ # :stopdoc:
+
# alias foo bar
# ^^^^^^^^^^^^^
def visit_alias_method_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit(node.new_name)
old_name = visit(node.old_name)
@@ -559,6 +670,9 @@ module Prism
# alias $foo $bar
# ^^^^^^^^^^^^^^^
def visit_alias_global_variable_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit_alias_global_variable_node_value(node.new_name)
old_name = visit_alias_global_variable_node_value(node.old_name)
@@ -584,6 +698,10 @@ module Prism
# ^^^^^^^^^
def visit_alternation_pattern_node(node)
left = visit_pattern_node(node.left)
+
+ bounds(node.operator_loc)
+ on_op("|")
+
right = visit_pattern_node(node.right)
bounds(node.location)
@@ -594,7 +712,13 @@ module Prism
# parenthesis node that can be used to wrap patterns.
private def visit_pattern_node(node)
if node.is_a?(ParenthesesNode)
- visit(node.body)
+ bounds(node.opening_loc)
+ on_lparen("(")
+ result = visit(node.body)
+ bounds(node.closing_loc)
+ on_rparen(")")
+
+ result
else
visit(node)
end
@@ -604,6 +728,14 @@ module Prism
# ^^^^^^^
def visit_and_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "and"
+ on_kw("and")
+ else
+ on_op("&&")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -631,6 +763,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%i/
@@ -650,6 +784,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%W/
@@ -687,6 +823,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%I/
@@ -724,6 +862,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
else
@@ -740,15 +880,21 @@ module Prism
on_array(elements)
end
- # Dispatch a words_sep event that contains the space between the elements
+ # Dispatch words_sep events that contains the whitespace between the elements
# of list literals.
private def visit_words_sep(opening_loc, previous, current)
- end_offset = (previous.nil? ? opening_loc : previous.location).end_offset
- start_offset = current.location.start_offset
-
- if end_offset != start_offset
- bounds(current.location.copy(start_offset: end_offset))
- on_words_sep(source.byteslice(end_offset...start_offset))
+ start_offset = (previous.nil? ? opening_loc : previous.location).end_offset
+ end_offset = current.start_offset
+ length = end_offset - start_offset
+
+ if length > 0
+ whitespace = source.byteslice(start_offset, length)
+ current_offset = start_offset
+ whitespace.each_line do |part|
+ bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize))
+ on_words_sep(part)
+ current_offset += part.bytesize
+ end
end
end
@@ -774,9 +920,18 @@ module Prism
# ^^^^^
def visit_array_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+
requireds = visit_all(node.requireds) if node.requireds.any?
rest =
if (rest_node = node.rest).is_a?(SplatNode)
+ bounds(rest_node.operator_loc)
+ on_op("*")
+
if rest_node.expression.nil?
bounds(rest_node.location)
on_var_field(nil)
@@ -787,6 +942,10 @@ module Prism
posts = visit_all(node.posts) if node.posts.any?
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_aryptn(constant, requireds, rest, posts)
end
@@ -802,6 +961,12 @@ module Prism
# ^^^^
def visit_assoc_node(node)
key = visit(node.key)
+
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
value = visit(node.value)
bounds(node.location)
@@ -814,6 +979,9 @@ module Prism
# { **foo }
# ^^^^^
def visit_assoc_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
value = visit(node.value)
bounds(node.location)
@@ -830,8 +998,18 @@ module Prism
# begin end
# ^^^^^^^^^
def visit_begin_node(node)
+ if node.begin_keyword_loc
+ bounds(node.begin_keyword_loc)
+ on_kw("begin")
+ end
+
clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false)
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_begin(clauses)
end
@@ -843,7 +1021,7 @@ module Prism
on_stmts_add(on_stmts_new, on_void_stmt)
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline)
bounds(node.statements.location)
visit_statements_node_body(body)
@@ -852,12 +1030,15 @@ module Prism
rescue_clause = visit(node.rescue_clause)
else_clause =
unless (else_clause_node = node.else_clause).nil?
+ bounds(else_clause_node.else_keyword_loc)
+ on_kw("else")
+
else_statements =
if else_clause_node.statements.nil?
[nil]
else
body = else_clause_node.statements.body
- body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
body
end
@@ -879,7 +1060,7 @@ module Prism
on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil)
when StatementsNode
body = [*node.body]
- body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline)
stmts = visit_statements_node_body(body)
bounds(node.body.first.location)
@@ -894,6 +1075,8 @@ module Prism
# foo(&bar)
# ^^^^
def visit_block_argument_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
visit(node.expression)
end
@@ -907,6 +1090,13 @@ module Prism
# Visit a BlockNode.
def visit_block_node(node)
braces = node.opening == "{"
+ bounds(node.opening_loc)
+ if braces
+ on_lbrace("{")
+ else
+ on_kw("do")
+ end
+
parameters = visit(node.parameters)
body =
@@ -919,7 +1109,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -931,6 +1121,14 @@ module Prism
end
if braces
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ else
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
+ if braces
bounds(node.location)
on_brace_block(parameters, body)
else
@@ -942,12 +1140,15 @@ module Prism
# def foo(&bar); end
# ^^^^
def visit_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+
if node.name_loc.nil?
bounds(node.location)
on_blockarg(nil)
else
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
bounds(node.location)
on_blockarg(name)
@@ -956,6 +1157,9 @@ module Prism
# A block's parameters.
def visit_block_parameters_node(node)
+ bounds(node.opening_loc)
+ on_op("|")
+
parameters =
if node.parameters.nil?
on_params(nil, nil, nil, nil, nil, nil, nil)
@@ -970,6 +1174,9 @@ module Prism
false
end
+ bounds(node.closing_loc)
+ on_op("|")
+
bounds(node.location)
on_block_var(parameters, locals)
end
@@ -980,6 +1187,9 @@ module Prism
# break foo
# ^^^^^^^^^
def visit_break_node(node)
+ bounds(node.keyword_loc)
+ on_kw("break")
+
if node.arguments.nil?
bounds(node.location)
on_break(on_args_new)
@@ -1004,20 +1214,32 @@ module Prism
case node.name
when :[]
receiver = visit(node.receiver)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
+ block = visit(block_node)
bounds(node.location)
call = on_aref(receiver, arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.location)
on_method_add_block(call, block)
+ else
+ call
end
when :[]=
receiver = visit(node.receiver)
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
*arguments, last_argument = node.arguments.arguments
arguments << node.block if !node.block.nil?
@@ -1033,6 +1255,11 @@ module Prism
end
end
+ bounds(node.closing_loc)
+ on_rbracket("]")
+ bounds(node.equal_loc)
+ on_op("=")
+
bounds(node.location)
call = on_aref_field(receiver, arguments)
value = visit_write_value(last_argument)
@@ -1040,27 +1267,54 @@ module Prism
bounds(last_argument.location)
on_assign(call, value)
when :-@, :+@, :~
- receiver = visit(node.receiver)
+ bounds(node.message_loc)
+ on_op(node.message)
+ receiver = visit(node.receiver)
bounds(node.location)
on_unary(node.name, receiver)
when :!
+ bounds(node.message_loc)
if node.message == "not"
+ on_kw("not")
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
receiver =
- if !node.receiver.is_a?(ParenthesesNode) || !node.receiver.body.nil?
+ if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil?
+ # The parens in `not()` just emit parens and nothing else.
+ bounds(node.receiver.opening_loc)
+ on_lparen("(")
+ bounds(node.receiver.closing_loc)
+ on_rparen(")")
+ nil
+ else
visit(node.receiver)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
bounds(node.location)
on_unary(:not, receiver)
else
+ on_op("!")
+
receiver = visit(node.receiver)
bounds(node.location)
on_unary(:!, receiver)
end
- when *BINARY_OPERATORS
+ when BINARY_OPERATORS
receiver = visit(node.receiver)
+
+ bounds(node.message_loc)
+ on_op(node.message)
+
value = visit(node.arguments.arguments.first)
bounds(node.location)
@@ -1072,9 +1326,21 @@ module Prism
if node.variable_call?
on_vcall(message)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
- if node.opening_loc.nil? && arguments&.any?
+ if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any?
bounds(node.location)
on_command(message, arguments)
elsif !node.opening_loc.nil?
@@ -1085,11 +1351,11 @@ module Prism
on_method_add_arg(on_fcall(message), on_args_new)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
@@ -1097,7 +1363,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
message =
if node.message_loc.nil?
@@ -1107,13 +1373,30 @@ module Prism
visit_token(node.message, false)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil?
value = visit_write_value(node.arguments.arguments.first)
bounds(node.location)
on_assign(on_field(receiver, call_operator, message), value)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
if node.opening_loc.nil?
bounds(node.location)
@@ -1131,27 +1414,35 @@ module Prism
on_method_add_arg(on_call(receiver, call_operator, message), arguments)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
end
- # Visit the arguments and block of a call node and return the arguments
- # and block as they should be used.
- private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ # Extract the arguments and block Ripper-style, which means if the block
+ # is like `&b` then it's moved to arguments.
+ private def get_arguments_and_block(arguments_node, block_node)
arguments = arguments_node&.arguments || []
block = block_node
if block.is_a?(BlockArgumentNode)
- arguments << block
+ arguments += [block]
block = nil
end
+ [arguments, block]
+ end
+
+ # Visit the arguments and block of a call node and return the arguments
+ # and block as they should be used.
+ private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ arguments, block = get_arguments_and_block(arguments_node, block_node)
+
[
if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode)
visit(arguments.first)
@@ -1165,7 +1456,7 @@ module Prism
on_args_add_block(args, false)
end
end,
- visit(block)
+ block,
]
end
@@ -1183,7 +1474,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1205,7 +1496,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1227,7 +1518,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1249,6 +1540,9 @@ module Prism
if node.call_operator == "::"
receiver = visit(node.receiver)
+ bounds(node.call_operator_loc)
+ on_op("::")
+
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1258,7 +1552,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1272,6 +1566,10 @@ module Prism
# ^^^^^^^^^^
def visit_capture_pattern_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
target = visit(node.target)
bounds(node.location)
@@ -1281,10 +1579,21 @@ module Prism
# case foo; when bar; end
# ^^^^^^^^^^^^^^^^^^^^^^^
def visit_case_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map { |condition| visit(condition) }
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition|
- on_when(*visit(condition), current)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_when(*condition, current)
end
bounds(node.location)
@@ -1294,10 +1603,23 @@ module Prism
# case foo; in bar; end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_case_match_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map do | condition|
+ visit(condition)
+ end
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition|
- on_in(*visit(condition), current)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_in(*condition, current)
end
bounds(node.location)
@@ -1307,6 +1629,9 @@ module Prism
# class Foo; end
# ^^^^^^^^^^^^^^
def visit_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -1315,9 +1640,17 @@ module Prism
visit(node.constant_path)
end
+ if node.inheritance_operator_loc
+ bounds(node.inheritance_operator_loc)
+ on_op("<")
+ end
+
superclass = visit(node.superclass)
bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_class(constant_path, superclass, bodystmt)
end
@@ -1331,12 +1664,13 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_cvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1401,12 +1735,13 @@ module Prism
# Foo = 1
# ^^^^^^^
- #
- # Foo, Bar = 1
- # ^^^ ^^^
def visit_constant_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_const(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1466,6 +1801,11 @@ module Prism
# ^^^^^^^^
def visit_constant_path_node(node)
if node.parent.nil?
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1474,6 +1814,9 @@ module Prism
else
parent = visit(node.parent)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1484,11 +1827,12 @@ module Prism
# Foo::Bar = 1
# ^^^^^^^^^^^^
- #
- # Foo::Foo, Bar::Bar = 1
- # ^^^^^^^^ ^^^^^^^^
def visit_constant_path_write_node(node)
target = visit_constant_path_write_node_target(node.target)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1498,6 +1842,11 @@ module Prism
# Visit a constant path that is part of a write node.
private def visit_constant_path_write_node_target(node)
if node.parent.nil?
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1506,6 +1855,9 @@ module Prism
else
parent = visit(node.parent)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1518,7 +1870,6 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_constant_path_operator_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.binary_operator_loc)
operator = on_op("#{node.binary_operator}=")
@@ -1532,7 +1883,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_and_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("&&=")
@@ -1546,7 +1896,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_or_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("||=")
@@ -1568,16 +1917,24 @@ module Prism
# def self.foo; end
# ^^^^^^^^^^^^^^^^^
def visit_def_node(node)
+ bounds(node.def_keyword_loc)
+ on_kw("def")
+
receiver = visit(node.receiver)
operator =
if !node.operator_loc.nil?
bounds(node.operator_loc)
- visit_token(node.operator)
+ node.operator == "." ? on_period(".") : on_op("::")
end
bounds(node.name_loc)
name = visit_token(node.name_loc.slice)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
parameters =
if node.parameters.nil?
bounds(node.location)
@@ -1587,10 +1944,17 @@ module Prism
end
if !node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
parameters = on_paren(parameters)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
bodystmt =
if node.equal_loc.nil?
visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body)
@@ -1601,11 +1965,16 @@ module Prism
on_bodystmt(body, nil, nil, nil)
end
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
- if receiver.nil?
- on_def(name, parameters, bodystmt)
- else
+ if receiver
on_defs(receiver, operator, name, parameters, bodystmt)
+ else
+ on_def(name, parameters, bodystmt)
end
end
@@ -1615,8 +1984,21 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
+ bounds(node.keyword_loc)
+ on_kw("defined?")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
expression = visit(node.value)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
# Very weird circumstances here where something like:
#
# defined?
@@ -1637,17 +2019,24 @@ module Prism
# if foo then bar else baz end
# ^^^^^^^^^^^^
def visit_else_node(node)
+ bounds(node.else_keyword_loc)
+ on_kw("else")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
body
end
+ else_statements = visit_statements_node_body(statements)
+
+ bounds(node.end_keyword_loc)
+ on_kw("end")
bounds(node.location)
- on_else(visit_statements_node_body(statements))
+ on_else(else_statements)
end
# "foo #{bar}"
@@ -1685,12 +2074,15 @@ module Prism
# Visit an EnsureNode node.
def visit_ensure_node(node)
+ bounds(node.ensure_keyword_loc)
+ on_kw("ensure")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
body
end
@@ -1711,6 +2103,14 @@ module Prism
# ^^^^^^^^^^^
def visit_find_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+ bounds(node.left.operator_loc)
+ on_op("*")
+
left =
if node.left.expression.nil?
bounds(node.left.location)
@@ -1720,6 +2120,10 @@ module Prism
end
requireds = visit_all(node.requireds) if node.requireds.any?
+
+ bounds(node.right.operator_loc)
+ on_op("*")
+
right =
if node.right.expression.nil?
bounds(node.right.location)
@@ -1728,6 +2132,10 @@ module Prism
visit(node.right.expression)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_fndptn(constant, left, requireds, right)
end
@@ -1736,6 +2144,10 @@ module Prism
# ^^^^^^^^^^
def visit_flip_flop_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -1755,8 +2167,18 @@ module Prism
# for foo in bar do end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_for_node(node)
+ bounds(node.for_keyword_loc)
+ on_kw("for")
+
index = visit(node.index)
+ bounds(node.in_keyword_loc)
+ on_kw("in")
+
collection = visit(node.collection)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1765,6 +2187,9 @@ module Prism
visit(node.statements)
end
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_for(index, collection, statements)
end
@@ -1773,6 +2198,7 @@ module Prism
# ^^^
def visit_forwarding_arguments_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1780,6 +2206,7 @@ module Prism
# ^^^
def visit_forwarding_parameter_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1789,6 +2216,9 @@ module Prism
# super {}
# ^^^^^^^^
def visit_forwarding_super_node(node)
+ bounds(node.keyword_loc)
+ on_kw("super")
+
if node.block.nil?
bounds(node.location)
on_zsuper
@@ -1809,12 +2239,13 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_gvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1873,6 +2304,9 @@ module Prism
# {}
# ^^
def visit_hash_node(node)
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
elements =
if node.elements.any?
args = visit_all(node.elements)
@@ -1881,6 +2315,8 @@ module Prism
on_assoclist_from_args(args)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_hash(elements)
end
@@ -1889,6 +2325,15 @@ module Prism
# ^^
def visit_hash_pattern_node(node)
constant = visit(node.constant)
+
+ if node.constant
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ elsif node.opening_loc
+ bounds(node.opening_loc)
+ on_lbrace("{")
+ end
+
elements =
if node.elements.any? || !node.rest.nil?
node.elements.map do |element|
@@ -1911,12 +2356,21 @@ module Prism
rest =
case node.rest
when AssocSplatNode
+ bounds(node.rest.operator_loc)
+ on_op("**")
visit(node.rest.value)
when NoKeywordsParameterNode
bounds(node.rest.location)
on_var_field(visit(node.rest))
end
+ if node.constant
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ end
bounds(node.location)
on_hshptn(constant, elements, rest)
end
@@ -1932,13 +2386,27 @@ module Prism
def visit_if_node(node)
if node.then_keyword == "?"
predicate = visit(node.predicate)
+
+ bounds(node.then_keyword_loc)
+ on_op("?")
+
truthy = visit(node.statements.body.first)
+
+ bounds(node.subsequent.else_keyword_loc)
+ on_op(":")
+
falsy = visit(node.subsequent.statements.body.first)
bounds(node.location)
on_ifop(predicate, truthy, falsy)
elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
+ if node.then_keyword_loc && node.then_keyword != "?"
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1948,6 +2416,11 @@ module Prism
end
subsequent = visit(node.subsequent)
+ if node.end_keyword_loc && !node.subsequent
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
if node.if_keyword == "if"
on_if(predicate, statements, subsequent)
@@ -1956,6 +2429,8 @@ module Prism
end
else
statements = visit(node.statements.body.first)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
bounds(node.location)
@@ -1987,7 +2462,14 @@ module Prism
# This is a special case where we're not going to call on_in directly
# because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.in_loc)
+ on_kw("in")
+
pattern = visit_pattern_node(node.pattern)
+ if node.then_loc
+ bounds(node.then_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -2003,8 +2485,15 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_index_operator_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2020,8 +2509,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_and_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2037,8 +2533,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_or_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2054,8 +2557,15 @@ module Prism
# ^^^^^^^^
def visit_index_target_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
on_aref_field(receiver, arguments)
end
@@ -2072,6 +2582,10 @@ module Prism
def visit_instance_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ivar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2174,20 +2688,37 @@ module Prism
# "foo #{bar}"
# ^^^^^^^^^^^^
def visit_interpolated_string_node(node)
- if node.opening&.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node)
+ with_string_bounds(node) do
+ if node.opening&.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node)
- bounds(node.location)
- on_string_literal(heredoc)
- elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
- first, *rest = node.parts
- rest.inject(visit(first)) do |content, part|
- concat = visit(part)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
+ first, *rest = node.parts
+ rest.inject(visit(first)) do |content, part|
+ concat = visit(part)
+
+ bounds(part.location)
+ on_string_concat(content, concat)
+ end
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_string_content) do |content, part|
+ on_string_add(content, visit_string_content(part))
+ end
- bounds(part.location)
- on_string_concat(content, concat)
+ bounds(node.location)
+ on_string_literal(parts)
end
- else
+ end
+ end
+
+ # :"foo #{bar}"
+ # ^^^^^^^^^^^^^
+ def visit_interpolated_symbol_node(node)
+ with_string_bounds(node) do
bounds(node.parts.first.location)
parts =
node.parts.inject(on_string_content) do |content, part|
@@ -2195,40 +2726,29 @@ module Prism
end
bounds(node.location)
- on_string_literal(parts)
+ on_dyna_symbol(parts)
end
end
- # :"foo #{bar}"
- # ^^^^^^^^^^^^^
- def visit_interpolated_symbol_node(node)
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_string_content) do |content, part|
- on_string_add(content, visit_string_content(part))
- end
-
- bounds(node.location)
- on_dyna_symbol(parts)
- end
-
# `foo #{bar}`
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
- if node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node)
+ with_string_bounds(node) do
+ if node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_xstring_new) do |content, part|
- on_xstring_add(content, visit_string_content(part))
- end
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_xstring_new) do |content, part|
+ on_xstring_add(content, visit_string_content(part))
+ end
- bounds(node.location)
- on_xstring_literal(parts)
+ bounds(node.location)
+ on_xstring_literal(parts)
+ end
end
end
@@ -2269,6 +2789,9 @@ module Prism
# def foo(**); end
# ^^
def visit_keyword_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
if node.name_loc.nil?
bounds(node.location)
on_kwrest_param(nil)
@@ -2288,6 +2811,11 @@ module Prism
parameters =
if node.parameters.is_a?(BlockParametersNode)
+ if node.parameters.opening_loc
+ bounds(node.parameters.opening_loc)
+ on_lparen("(")
+ end
+
# Ripper does not track block-locals within lambdas, so we skip
# directly to the parameters here.
params =
@@ -2298,6 +2826,13 @@ module Prism
visit(node.parameters.parameters)
end
+ visit_all(node.parameters.locals)
+
+ if node.parameters.closing_loc
+ bounds(node.parameters.closing_loc)
+ on_rparen(")")
+ end
+
if node.parameters.opening_loc.nil?
params
else
@@ -2310,9 +2845,11 @@ module Prism
end
braces = node.opening == "{"
+ bounds(node.opening_loc)
if braces
- bounds(node.opening_loc)
on_tlambeg(node.opening)
+ else
+ on_kw("do")
end
body =
@@ -2325,7 +2862,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -2336,6 +2873,13 @@ module Prism
raise
end
+ bounds(node.closing_loc)
+ if braces
+ on_rbrace("}")
+ else
+ on_kw("end")
+ end
+
bounds(node.location)
on_lambda(parameters, body)
end
@@ -2352,6 +2896,10 @@ module Prism
def visit_local_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ident(node.name_loc.slice))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2426,6 +2974,8 @@ module Prism
# ^^^^^^^^^^
def visit_match_predicate_node(node)
value = visit(node.value)
+ bounds(node.operator_loc)
+ on_kw("in")
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2435,6 +2985,10 @@ module Prism
# ^^^^^^^^^^
def visit_match_required_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2448,13 +3002,16 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error.
- def visit_missing_node(node)
- raise "Cannot visit missing nodes directly."
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery nodes directly."
end
# module Foo; end
# ^^^^^^^^^^^^^^^
def visit_module_node(node)
+ bounds(node.module_keyword_loc)
+ on_kw("module")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -2465,6 +3022,9 @@ module Prism
bodystmt = visit_body_node(node.constant_path.location, node.body, true)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_module(constant_path, bodystmt)
end
@@ -2472,9 +3032,19 @@ module Prism
# (foo, bar), bar = qux
# ^^^^^^^^^^
def visit_multi_target_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
if node.lparen_loc.nil?
targets
else
@@ -2526,9 +3096,22 @@ module Prism
# foo, bar = baz
# ^^^^^^^^^^^^^^
def visit_multi_write_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ bounds(node.operator_loc)
+ on_op("=")
+
unless node.lparen_loc.nil?
bounds(node.lparen_loc)
targets = on_mlhs_paren(targets)
@@ -2546,6 +3129,9 @@ module Prism
# next foo
# ^^^^^^^^
def visit_next_node(node)
+ bounds(node.keyword_loc)
+ on_kw("next")
+
if node.arguments.nil?
bounds(node.location)
on_next(on_args_new)
@@ -2564,9 +3150,24 @@ module Prism
on_var_ref(on_kw("nil"))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+ bounds(node.keyword_loc)
+ on_kw("nil")
+ bounds(node.location)
+ on_blockarg(:nil)
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+ bounds(node.keyword_loc)
+ on_kw("nil")
bounds(node.location)
on_nokw_param(nil)
@@ -2599,7 +3200,11 @@ module Prism
# ^^^^^^^
def visit_optional_parameter_node(node)
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit(node.value)
[name, value]
@@ -2609,6 +3214,14 @@ module Prism
# ^^^^^^
def visit_or_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "or"
+ on_kw("or")
+ else
+ on_op("||")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -2632,9 +3245,19 @@ module Prism
# Visit a destructured positional parameter node.
private def visit_destructured_parameter_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
bounds(node.lparen_loc)
on_mlhs_paren(targets)
end
@@ -2645,6 +3268,9 @@ module Prism
# (1)
# ^^^
def visit_parentheses_node(node)
+ bounds(node.opening_loc)
+ on_lparen("(")
+
body =
if node.body.nil?
on_stmts_add(on_stmts_new, on_void_stmt)
@@ -2652,6 +3278,8 @@ module Prism
visit(node.body)
end
+ bounds(node.closing_loc)
+ on_rparen(")")
bounds(node.location)
on_paren(body)
end
@@ -2659,8 +3287,15 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+ bounds(node.lparen_loc)
+ on_lparen("(")
+
expression = visit(node.expression)
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.location)
on_begin(expression)
end
@@ -2668,12 +3303,20 @@ module Prism
# foo = 1 and bar => ^foo
# ^^^^
def visit_pinned_variable_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+
visit(node.variable)
end
# END {}
# ^^^^^^
def visit_post_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("END")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2682,6 +3325,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_END(statements)
end
@@ -2689,6 +3334,11 @@ module Prism
# BEGIN {}
# ^^^^^^^^
def visit_pre_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("BEGIN")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2697,6 +3347,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_BEGIN(statements)
end
@@ -2704,7 +3356,7 @@ module Prism
# The top-level program node.
def visit_program_node(node)
body = node.statements.body
- body << nil if body.empty?
+ body = [nil] if body.empty?
statements = visit_statements_node_body(body)
bounds(node.location)
@@ -2715,6 +3367,10 @@ module Prism
# ^^^^
def visit_range_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -2735,6 +3391,7 @@ module Prism
# ^^^^
def visit_redo_node(node)
bounds(node.location)
+ on_kw("redo")
on_redo
end
@@ -2777,6 +3434,9 @@ module Prism
# foo rescue bar
# ^^^^^^^^^^^^^^
def visit_rescue_modifier_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
expression = visit_write_value(node.expression)
rescue_expression = visit(node.rescue_expression)
@@ -2787,6 +3447,9 @@ module Prism
# begin; rescue; end
# ^^^^^^^
def visit_rescue_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
exceptions =
case node.exceptions.length
when 0
@@ -2824,6 +3487,11 @@ module Prism
end
end
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
reference = visit(node.reference)
statements =
if node.statements.nil?
@@ -2845,12 +3513,15 @@ module Prism
# def foo(*); end
# ^
def visit_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
+
if node.name_loc.nil?
bounds(node.location)
on_rest_param(nil)
else
bounds(node.name_loc)
- on_rest_param(visit_token(node.name.to_s))
+ on_rest_param(on_ident(node.name.to_s))
end
end
@@ -2858,6 +3529,7 @@ module Prism
# ^^^^^
def visit_retry_node(node)
bounds(node.location)
+ on_kw("retry")
on_retry
end
@@ -2867,6 +3539,9 @@ module Prism
# return 1
# ^^^^^^^^
def visit_return_node(node)
+ bounds(node.keyword_loc)
+ on_kw("return")
+
if node.arguments.nil?
bounds(node.location)
on_return0
@@ -2893,9 +3568,17 @@ module Prism
# class << self; end
# ^^^^^^^^^^^^^^^^^^
def visit_singleton_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+ bounds(node.operator_loc)
+ on_op("<<")
+
expression = visit(node.expression)
bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_sclass(expression, bodystmt)
end
@@ -2930,6 +3613,8 @@ module Prism
# def foo(*); bar(*); end
# ^
def visit_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
visit(node.expression)
end
@@ -2952,26 +3637,68 @@ module Prism
# "foo"
# ^^^^^
def visit_string_node(node)
- if (content = node.content).empty?
- bounds(node.location)
- on_string_literal(on_string_content)
- elsif (opening = node.opening) == "?"
- bounds(node.location)
- on_CHAR("?#{node.content}")
- elsif opening.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if (content = node.content).empty?
+ bounds(node.location)
+ on_string_literal(on_string_content)
+ elsif (opening = node.opening) == "?"
+ bounds(node.location)
+ on_CHAR("?#{node.content}")
+ elsif opening.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node.to_interpolated)
- bounds(node.location)
- on_string_literal(heredoc)
- else
- bounds(node.content_loc)
- tstring_content = on_tstring_content(content)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ tstring_content = on_tstring_content(content)
- bounds(node.location)
- on_string_literal(on_string_add(on_string_content, tstring_content))
+ bounds(node.location)
+ on_string_literal(on_string_add(on_string_content, tstring_content))
+ end
end
end
+ # Responsible for emitting the various string-like begin/end events
+ private def with_string_bounds(node)
+ # `foo "bar": baz` doesn't emit the closing location
+ assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":")
+
+ is_heredoc = opening&.start_with?("<<")
+ if is_heredoc
+ bounds(node.opening_loc)
+ on_heredoc_beg(node.opening)
+ elsif opening&.start_with?(":", "%s")
+ bounds(node.opening_loc)
+ on_symbeg(node.opening)
+ elsif opening&.start_with?("`", "%x")
+ bounds(node.opening_loc)
+ on_backtick(node.opening)
+ elsif opening && !opening.start_with?("?")
+ bounds(node.opening_loc)
+ on_tstring_beg(opening)
+ end
+
+ result = yield
+ if assoc
+ if node.closing != ":"
+ bounds(node.closing_loc)
+ on_label_end(node.closing)
+ end
+ return result
+ end
+
+ if is_heredoc
+ bounds(node.closing_loc)
+ on_heredoc_end(node.closing)
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_tstring_end(node.closing)
+ end
+
+ result
+ end
+
# Ripper gives back the escaped string content but strips out the common
# leading whitespace. Prism gives back the unescaped string content and
# a location for the escaped string content. Unfortunately these don't
@@ -3049,42 +3776,39 @@ module Prism
# Visit a heredoc node that is representing a string.
private def visit_heredoc_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_string_content) do |parts, part|
- on_string_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_string_content) do |parts, part|
+ on_string_add(parts, part)
+ end
end
# Visit a heredoc node that is representing an xstring.
private def visit_heredoc_x_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
- on_xstring_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
+ on_xstring_add(parts, part)
+ end
end
# super(foo)
# ^^^^^^^^^^
def visit_super_node(node)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+ bounds(node.keyword_loc)
+ on_kw("super")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
if !node.lparen_loc.nil?
bounds(node.lparen_loc)
@@ -3094,35 +3818,36 @@ module Prism
bounds(node.location)
call = on_super(arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
# :foo
# ^^^^
def visit_symbol_node(node)
- if (opening = node.opening)&.match?(/^%s|['"]:?$/)
- bounds(node.value_loc)
- content = on_string_content
-
- if !(value = node.value).empty?
- content = on_string_add(content, on_tstring_content(value))
+ with_string_bounds(node) do
+ if node.value_loc.nil?
+ bounds(node.location)
+ on_dyna_symbol(on_string_content)
+ elsif (opening = node.opening)&.match?(/^%s|['"]:?$/)
+ bounds(node.value_loc)
+ content = on_string_add(on_string_content, on_tstring_content(node.value))
+ bounds(node.location)
+ on_dyna_symbol(content)
+ elsif (closing = node.closing) == ":"
+ bounds(node.location)
+ on_label("#{node.value}:")
+ elsif opening.nil? && node.closing_loc.nil?
+ bounds(node.value_loc)
+ on_symbol_literal(visit_token(node.value))
+ else
+ bounds(node.value_loc)
+ on_symbol_literal(on_symbol(visit_token(node.value)))
end
-
- on_dyna_symbol(content)
- elsif (closing = node.closing) == ":"
- bounds(node.location)
- on_label("#{node.value}:")
- elsif opening.nil? && node.closing_loc.nil?
- bounds(node.value_loc)
- on_symbol_literal(visit_token(node.value))
- else
- bounds(node.value_loc)
- on_symbol_literal(on_symbol(visit_token(node.value)))
end
end
@@ -3136,6 +3861,9 @@ module Prism
# undef foo
# ^^^^^^^^^
def visit_undef_node(node)
+ bounds(node.keyword_loc)
+ on_kw("undef")
+
names = visit_all(node.names)
bounds(node.location)
@@ -3149,7 +3877,13 @@ module Prism
# ^^^^^^^^^^^^^^
def visit_unless_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3159,10 +3893,17 @@ module Prism
end
else_clause = visit(node.else_clause)
+ if node.end_keyword_loc && !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_unless(predicate, statements, else_clause)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3176,7 +3917,14 @@ module Prism
# bar until foo
# ^^^^^^^^^^^^^
def visit_until_node(node)
+ bounds(node.keyword_loc)
+ on_kw("until")
+
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
statements =
if node.statements.nil?
@@ -3186,6 +3934,11 @@ module Prism
visit(node.statements)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_until(predicate, statements)
else
@@ -3203,7 +3956,14 @@ module Prism
# This is a special case where we're not going to call on_when directly
# because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.keyword_loc)
+ on_kw("when")
+
conditions = visit_arguments(node.conditions)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3222,7 +3982,17 @@ module Prism
# ^^^^^^^^^^^^^
def visit_while_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("while")
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3235,6 +4005,8 @@ module Prism
on_while(predicate, statements)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("while")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3245,20 +4017,22 @@ module Prism
# `foo`
# ^^^^^
def visit_x_string_node(node)
- if node.unescaped.empty?
- bounds(node.location)
- on_xstring_literal(on_xstring_new)
- elsif node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if node.unescaped.empty?
+ bounds(node.location)
+ on_xstring_literal(on_xstring_new)
+ elsif node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node.to_interpolated)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.content_loc)
- content = on_tstring_content(node.content)
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ content = on_tstring_content(node.content)
- bounds(node.location)
- on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ bounds(node.location)
+ on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ end
end
end
@@ -3268,10 +4042,18 @@ module Prism
# yield 1
# ^^^^^^^
def visit_yield_node(node)
+ bounds(node.keyword_loc)
+ on_kw("yield")
+
if node.arguments.nil? && node.lparen_loc.nil?
bounds(node.location)
on_yield0
else
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
arguments =
if node.arguments.nil?
bounds(node.location)
@@ -3281,6 +4063,8 @@ module Prism
end
unless node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
arguments = on_paren(arguments)
end
@@ -3294,7 +4078,11 @@ module Prism
# Lazily initialize the parse result.
def result
- @result ||= Prism.parse(source, partial_script: true)
+ @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
+ end
+
+ def line_and_column_cache
+ @line_and_column_cache ||= LineAndColumnCache.new(result.source)
end
##########################################################################
@@ -3315,30 +4103,34 @@ module Prism
# Visit the string content of a particular node. This method is used to
# split into the various token types.
def visit_token(token, allow_keywords = true)
- case token
- when "."
+ if token == "."
on_period(token)
- when "`"
+ elsif token == "`"
on_backtick(token)
- when *(allow_keywords ? KEYWORDS : [])
+ elsif allow_keywords && KEYWORDS.include?(token)
on_kw(token)
- when /^_/
+ elsif token.start_with?("_")
on_ident(token)
- when /^[[:upper:]]\w*$/
+ elsif token.match?(/^[[:upper:]]\w*$/)
on_const(token)
- when /^@@/
+ elsif token.start_with?("@@")
on_cvar(token)
- when /^@/
+ elsif token.start_with?("@")
on_ivar(token)
- when /^\$/
+ elsif token.start_with?("$")
on_gvar(token)
- when /^[[:punct:]]/
+ elsif token.match?(/^[[:punct:]]/)
on_op(token)
else
on_ident(token)
end
end
+ # Visit either `.`, `&.`, or `::`.
+ def visit_call_operator(token)
+ token == "." ? on_period(token) : on_op(token)
+ end
+
# Visit a node that represents a number. We need to explicitly handle the
# unary - operator.
def visit_number_node(node)
@@ -3346,6 +4138,9 @@ module Prism
location = node.location
if slice[0] == "-"
+ bounds(location.copy(length: 1))
+ on_op("-")
+
bounds(location.copy(start_offset: location.start_offset + 1))
value = yield slice[1..-1]
@@ -3394,26 +4189,24 @@ module Prism
# This method is responsible for updating lineno and column information
# to reflect the current node.
- #
- # This method could be drastically improved with some caching on the start
- # of every line, but for now it's good enough.
def bounds(location)
- @lineno = location.start_line
- @column = location.start_column
+ @lineno, @column = line_and_column_cache.line_and_column(location.start_offset)
end
+ # :startdoc:
+
##########################################################################
# Ripper interface
##########################################################################
# :stopdoc:
def _dispatch_0; end
- def _dispatch_1(_); end
- def _dispatch_2(_, _); end
- def _dispatch_3(_, _, _); end
- def _dispatch_4(_, _, _, _); end
- def _dispatch_5(_, _, _, _, _); end
- def _dispatch_7(_, _, _, _, _, _, _); end
+ def _dispatch_1(arg); arg end
+ def _dispatch_2(arg, _); arg end
+ def _dispatch_3(arg, _, _); arg end
+ def _dispatch_4(arg, _, _, _); arg end
+ def _dispatch_5(arg, _, _, _, _); arg end
+ def _dispatch_7(arg, _, _, _, _, _, _); arg end
# :startdoc:
#
diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb
new file mode 100644
index 0000000000..19deef2d37
--- /dev/null
+++ b/lib/prism/translation/ripper/filter.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Prism
+ module Translation
+ class Ripper
+ class Filter # :nodoc:
+ # :stopdoc:
+ def initialize(src, filename = '-', lineno = 1)
+ @__lexer = Lexer.new(src, filename, lineno)
+ @__line = nil
+ @__col = nil
+ @__state = nil
+ end
+
+ def filename
+ @__lexer.filename
+ end
+
+ def lineno
+ @__line
+ end
+
+ def column
+ @__col
+ end
+
+ def state
+ @__state
+ end
+
+ def parse(init = nil)
+ data = init
+ @__lexer.lex.each do |pos, event, tok, state|
+ @__line, @__col = *pos
+ @__state = state
+ data = if respond_to?(event, true)
+ then __send__(event, tok, data)
+ else on_default(event, tok, data)
+ end
+ end
+ data
+ end
+
+ private
+
+ def on_default(event, token, data)
+ data
+ end
+ # :startdoc:
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb
new file mode 100644
index 0000000000..c6aeae4bd7
--- /dev/null
+++ b/lib/prism/translation/ripper/lexer.rb
@@ -0,0 +1,133 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+require_relative "../ripper"
+
+module Prism
+ module Translation
+ class Ripper
+ class Lexer < Ripper # :nodoc:
+ class State # :nodoc:
+ attr_reader :to_int, :to_s
+
+ def initialize(i)
+ @to_int = i
+ @to_s = Ripper.lex_state_name(i)
+ freeze
+ end
+
+ def [](index)
+ case index
+ when 0, :to_int
+ @to_int
+ when 1, :to_s
+ @to_s
+ else
+ nil
+ end
+ end
+
+ alias to_i to_int
+ alias inspect to_s
+ def pretty_print(q) q.text(to_s) end
+ def ==(i) super or to_int == i end
+ def &(i) self.class.new(to_int & i) end
+ def |(i) self.class.new(to_int | i) end
+ def allbits?(i) to_int.allbits?(i) end
+ def anybits?(i) to_int.anybits?(i) end
+ def nobits?(i) to_int.nobits?(i) end
+
+ # Instances are frozen and there are only a handful of them so we
+ # cache them here.
+ STATES = Hash.new { |hash, key| hash[key] = State.new(key) }
+ private_constant :STATES
+
+ def self.[](i)
+ STATES[i]
+ end
+ end
+
+ class Elem # :nodoc:
+ attr_accessor :pos, :event, :tok, :state, :message
+
+ def initialize(pos, event, tok, state, message = nil)
+ @pos = pos
+ @event = event
+ @tok = tok
+ @state = State[state]
+ @message = message
+ end
+
+ def [](index)
+ case index
+ when 0, :pos
+ @pos
+ when 1, :event
+ @event
+ when 2, :tok
+ @tok
+ when 3, :state
+ @state
+ when 4, :message
+ @message
+ else
+ nil
+ end
+ end
+
+ def inspect
+ "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>"
+ end
+
+ alias to_s inspect
+
+ def pretty_print(q)
+ q.group(2, "#<#{self.class}:", ">") {
+ q.breakable
+ q.text("#{event}@#{pos[0]}:#{pos[1]}")
+ q.breakable
+ state.pretty_print(q)
+ q.breakable
+ q.text("token: ")
+ tok.pretty_print(q)
+ if message
+ q.breakable
+ q.text("message: ")
+ q.text(message)
+ end
+ }
+ end
+
+ def to_a
+ if @message
+ [@pos, @event, @tok, @state, @message]
+ else
+ [@pos, @event, @tok, @state]
+ end
+ end
+ end
+
+ # Pretty much just the same as Prism.lex_compat.
+ def lex(raise_errors: false)
+ Ripper.lex(@source, filename, lineno, raise_errors: raise_errors)
+ end
+
+ # Returns the lex_compat result wrapped in `Elem`. Errors are omitted.
+ # Since ripper is a streaming parser, tokens are expected to be emitted in the order
+ # that the parser encounters them. This is not implemented.
+ def parse(...)
+ lex(...).map do |position, event, token, state|
+ Elem.new(position, event, token, state.to_int)
+ end
+ end
+
+ # Similar to parse but ripper sorts the elements by position in the source. Also
+ # includes errors. Since prism does error recovery, in cases of syntax errors
+ # the result may differ greatly compared to ripper.
+ def scan(...)
+ parse(...)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb
index dc26a639a3..46c0333544 100644
--- a/lib/prism/translation/ripper/sexp.rb
+++ b/lib/prism/translation/ripper/sexp.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
require_relative "../ripper"
@@ -7,9 +8,7 @@ module Prism
class Ripper
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
# returns the arrays of [type, *children].
- class SexpBuilder < Ripper
- # :stopdoc:
-
+ class SexpBuilder < Ripper # :nodoc:
attr_reader :error
private
@@ -64,16 +63,12 @@ module Prism
remove_method :on_parse_error
alias on_parse_error on_error
alias compile_error on_error
-
- # :startdoc:
end
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
# returns the same values as ::Ripper::SexpBuilder except with a couple of
# niceties that flatten linked lists into arrays.
- class SexpBuilderPP < SexpBuilder
- # :stopdoc:
-
+ class SexpBuilderPP < SexpBuilder # :nodoc:
private
def on_heredoc_dedent(val, width)
@@ -117,8 +112,6 @@ module Prism
alias_method "on_#{event}", :_dispatch_event_push
end
end
-
- # :startdoc:
end
end
end
diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb
index 10e21cd16a..00ed625da3 100644
--- a/lib/prism/translation/ripper/shim.rb
+++ b/lib/prism/translation/ripper/shim.rb
@@ -2,4 +2,6 @@
# This writes the prism ripper translation into the Ripper constant so that
# users can transparently use Ripper without any changes.
+# :stopdoc:
Ripper = Prism::Translation::Ripper
+# :startdoc:
diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb
index 8784e22d10..42bc5ee658 100644
--- a/lib/prism/translation/ruby_parser.rb
+++ b/lib/prism/translation/ruby_parser.rb
@@ -1,21 +1,27 @@
# frozen_string_literal: true
+# :markup: markdown
begin
- require "ruby_parser"
+ require "sexp"
rescue LoadError
- warn(%q{Error: Unable to load ruby_parser. Add `gem "ruby_parser"` to your Gemfile.})
+ warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.})
exit(1)
end
+class RubyParser # :nodoc:
+ class SyntaxError < RuntimeError # :nodoc:
+ end
+end
+
module Prism
module Translation
# This module is the entry-point for converting a prism syntax tree into the
# seattlerb/ruby_parser gem's syntax tree.
class RubyParser
# A prism visitor that builds Sexp objects.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# This is the name of the file that we are compiling. We set it on every
- # Sexp object that is generated, and also use it to compile __FILE__
+ # Sexp object that is generated, and also use it to compile `__FILE__`
# nodes.
attr_reader :file
@@ -131,7 +137,7 @@ module Prism
# $+
# ^^
def visit_back_reference_read_node(node)
- s(node, :back_ref, node.name.name.delete_prefix("$").to_sym)
+ s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym)
end
# begin end
@@ -366,14 +372,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :class, name, visit(node.superclass))
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :class, name, visit(node.superclass))
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# @@foo
@@ -384,9 +394,6 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
s(node, class_variable_write_type, node.name, visit_write_value(node.value))
end
@@ -524,7 +531,9 @@ module Prism
s(node, :defs, visit(node.receiver), name)
end
+ attach_comments(result, node)
result.line(node.name_loc.start_line)
+
if node.parameters.nil?
result << s(node, :args).line(node.name_loc.start_line)
else
@@ -639,9 +648,6 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
s(node, :gasgn, node.name, visit_write_value(node.value))
end
@@ -787,9 +793,6 @@ module Prism
# @foo = 1
# ^^^^^^^^
- #
- # @foo, @bar = 1
- # ^^^^ ^^^^
def visit_instance_variable_write_node(node)
s(node, :iasgn, node.name, visit_write_value(node.value))
end
@@ -976,8 +979,8 @@ module Prism
def visit_lambda_node(node)
parameters =
case node.parameters
- when nil, NumberedParametersNode
- s(node, :args)
+ when nil, ItParametersNode, NumberedParametersNode
+ 0
else
visit(node.parameters)
end
@@ -1001,9 +1004,6 @@ module Prism
# foo = 1
# ^^^^^^^
- #
- # foo, bar = 1
- # ^^^ ^^^
def visit_local_variable_write_node(node)
s(node, :lasgn, node.name, visit_write_value(node.value))
end
@@ -1059,8 +1059,8 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
- raise "Cannot visit missing node directly"
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery node directly"
end
# module Foo; end
@@ -1073,14 +1073,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :module, name)
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :module, name)
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# foo, bar = baz
@@ -1136,6 +1140,12 @@ module Prism
s(node, :nil)
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ :"&nil"
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1188,7 +1198,7 @@ module Prism
# ^^^^^^^^^
def visit_parameters_node(node)
children =
- node.compact_child_nodes.map do |element|
+ node.each_child_node.map do |element|
if element.is_a?(MultiTargetNode)
visit_destructured_parameter(element)
else
@@ -1537,6 +1547,17 @@ module Prism
private
+ # Attach prism comments to the given sexp.
+ def attach_comments(sexp, node)
+ return unless node.comments
+ return if node.comments.empty?
+
+ extra = node.location.start_line - node.comments.last.location.start_line
+ comments = node.comments.map(&:slice)
+ comments.concat([nil] * [0, extra].max)
+ sexp.comments = comments.join("\n")
+ end
+
# Create a new compiler with the given options.
def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern)
Compiler.new(file, in_def: in_def, in_pattern: in_pattern)
@@ -1615,6 +1636,14 @@ module Prism
translate(Prism.parse_file(filepath, partial_script: true), filepath)
end
+ # Parse the give file and translate it into the
+ # seattlerb/ruby_parser gem's Sexp format. This method is
+ # provided for API compatibility to RubyParser and takes an
+ # optional +timeout+ argument.
+ def process(ruby, file = "(string)", timeout = nil)
+ Timeout.timeout(timeout) { parse(ruby, file) }
+ end
+
class << self
# Parse the given source and translate it into the seattlerb/ruby_parser
# gem's Sexp format.
@@ -1639,6 +1668,7 @@ module Prism
raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}"
end
+ result.attach_comments!
result.value.accept(Compiler.new(filepath))
end
end