summaryrefslogtreecommitdiff
path: root/lib/prism
diff options
context:
space:
mode:
Diffstat (limited to 'lib/prism')
-rw-r--r--lib/prism/desugar_compiler.rb140
-rw-r--r--lib/prism/ffi.rb259
-rw-r--r--lib/prism/lex_compat.rb469
-rw-r--r--lib/prism/node_ext.rb350
-rw-r--r--lib/prism/node_find.rb185
-rw-r--r--lib/prism/pack.rb228
-rw-r--r--lib/prism/parse_result.rb557
-rw-r--r--lib/prism/parse_result/comments.rb44
-rw-r--r--lib/prism/parse_result/errors.rb9
-rw-r--r--lib/prism/parse_result/newlines.rb62
-rw-r--r--lib/prism/pattern.rb84
-rw-r--r--lib/prism/polyfill/append_as_bytes.rb15
-rw-r--r--lib/prism/polyfill/scan_byte.rb14
-rw-r--r--lib/prism/polyfill/warn.rb36
-rw-r--r--lib/prism/prism.gemspec190
-rw-r--r--lib/prism/relocation.rb187
-rw-r--r--lib/prism/string_query.rb18
-rw-r--r--lib/prism/translation.rb11
-rw-r--r--lib/prism/translation/parser.rb81
-rw-r--r--lib/prism/translation/parser/builder.rb70
-rw-r--r--lib/prism/translation/parser/compiler.rb447
-rw-r--r--lib/prism/translation/parser/lexer.rb521
-rw-r--r--lib/prism/translation/parser33.rb12
-rw-r--r--lib/prism/translation/parser34.rb12
-rw-r--r--lib/prism/translation/parser_current.rb26
-rw-r--r--lib/prism/translation/parser_versions.rb36
-rw-r--r--lib/prism/translation/ripper.rb1270
-rw-r--r--lib/prism/translation/ripper/filter.rb53
-rw-r--r--lib/prism/translation/ripper/lexer.rb133
-rw-r--r--lib/prism/translation/ripper/sexp.rb13
-rw-r--r--lib/prism/translation/ripper/shim.rb2
-rw-r--r--lib/prism/translation/ruby_parser.rb115
32 files changed, 4030 insertions, 1619 deletions
diff --git a/lib/prism/desugar_compiler.rb b/lib/prism/desugar_compiler.rb
index e3b15fc3b0..c64d03f64a 100644
--- a/lib/prism/desugar_compiler.rb
+++ b/lib/prism/desugar_compiler.rb
@@ -1,11 +1,18 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class DesugarAndWriteNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((ClassVariableAndWriteNode | ConstantAndWriteNode | GlobalVariableAndWriteNode | InstanceVariableAndWriteNode | LocalVariableAndWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -15,6 +22,8 @@ module Prism
end
# Desugar `x &&= y` to `x && x = y`
+ #--
+ #: () -> node
def compile
and_node(
location: node.location,
@@ -35,8 +44,12 @@ module Prism
class DesugarOrWriteDefinedNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((ClassVariableOrWriteNode | ConstantOrWriteNode | GlobalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -46,6 +59,8 @@ module Prism
end
# Desugar `x ||= y` to `defined?(x) ? x : x = y`
+ #--
+ #: () -> node
def compile
if_node(
location: node.location,
@@ -86,8 +101,12 @@ module Prism
class DesugarOperatorWriteNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((ClassVariableOperatorWriteNode | ConstantOperatorWriteNode | GlobalVariableOperatorWriteNode | InstanceVariableOperatorWriteNode | LocalVariableOperatorWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -97,6 +116,8 @@ module Prism
end
# Desugar `x += y` to `x = x + y`
+ #--
+ #: () -> node
def compile
binary_operator_loc = node.binary_operator_loc.chop
@@ -130,8 +151,12 @@ module Prism
class DesugarOrWriteNode # :nodoc:
include DSL
- attr_reader :node, :default_source, :read_class, :write_class, :arguments
+ attr_reader :node #: InstanceVariableOrWriteNode | LocalVariableOrWriteNode
+ attr_reader :default_source #: Source
+ attr_reader :read_class, :write_class #: Symbol
+ attr_reader :arguments #: Hash[Symbol, untyped]
+ #: ((InstanceVariableOrWriteNode | LocalVariableOrWriteNode) node, Source default_source, Symbol read_class, Symbol write_class, **untyped arguments) -> void
def initialize(node, default_source, read_class, write_class, **arguments)
@node = node
@default_source = default_source
@@ -141,6 +166,8 @@ module Prism
end
# Desugar `x ||= y` to `x || x = y`
+ #--
+ #: () -> node
def compile
or_node(
location: node.location,
@@ -161,90 +188,105 @@ module Prism
private_constant :DesugarAndWriteNode, :DesugarOrWriteNode, :DesugarOrWriteDefinedNode, :DesugarOperatorWriteNode
class ClassVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ClassVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteDefinedNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ClassVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :class_variable_read_node, :class_variable_write_node, name: name).compile
end
end
class ConstantAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class ConstantOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteDefinedNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class ConstantOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :constant_read_node, :constant_write_node, name: name).compile
end
end
class GlobalVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class GlobalVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteDefinedNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class GlobalVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :global_variable_read_node, :global_variable_write_node, name: name).compile
end
end
class InstanceVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class InstanceVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class InstanceVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :instance_variable_read_node, :instance_variable_write_node, name: name).compile
end
end
class LocalVariableAndWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarAndWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
class LocalVariableOrWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOrWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
end
class LocalVariableOperatorWriteNode
+ #: () -> node
def desugar # :nodoc:
DesugarOperatorWriteNode.new(self, source, :local_variable_read_node, :local_variable_write_node, name: name, depth: depth).compile
end
@@ -253,137 +295,167 @@ module Prism
# DesugarCompiler is a compiler that desugars Ruby code into a more primitive
# form. This is useful for consumers that want to deal with fewer node types.
class DesugarCompiler < MutationCompiler
- # @@foo &&= bar
+ # `@@foo &&= bar`
#
# becomes
#
- # @@foo && @@foo = bar
+ # `@@foo && @@foo = bar`
+ #--
+ #: (ClassVariableAndWriteNode node) -> node
def visit_class_variable_and_write_node(node)
node.desugar
end
- # @@foo ||= bar
+ # `@@foo ||= bar`
#
# becomes
#
- # defined?(@@foo) ? @@foo : @@foo = bar
+ # `defined?(@@foo) ? @@foo : @@foo = bar`
+ #--
+ #: (ClassVariableOrWriteNode node) -> node
def visit_class_variable_or_write_node(node)
node.desugar
end
- # @@foo += bar
+ # `@@foo += bar`
#
# becomes
#
- # @@foo = @@foo + bar
+ # `@@foo = @@foo + bar`
+ #--
+ #: (ClassVariableOperatorWriteNode node) -> node
def visit_class_variable_operator_write_node(node)
node.desugar
end
- # Foo &&= bar
+ # `Foo &&= bar`
#
# becomes
#
- # Foo && Foo = bar
+ # `Foo && Foo = bar`
+ #--
+ #: (ConstantAndWriteNode node) -> node
def visit_constant_and_write_node(node)
node.desugar
end
- # Foo ||= bar
+ # `Foo ||= bar`
#
# becomes
#
- # defined?(Foo) ? Foo : Foo = bar
+ # `defined?(Foo) ? Foo : Foo = bar`
+ #--
+ #: (ConstantOrWriteNode node) -> node
def visit_constant_or_write_node(node)
node.desugar
end
- # Foo += bar
+ # `Foo += bar`
#
# becomes
#
- # Foo = Foo + bar
+ # `Foo = Foo + bar`
+ #--
+ #: (ConstantOperatorWriteNode node) -> node
def visit_constant_operator_write_node(node)
node.desugar
end
- # $foo &&= bar
+ # `$foo &&= bar`
#
# becomes
#
- # $foo && $foo = bar
+ # `$foo && $foo = bar`
+ #--
+ #: (GlobalVariableAndWriteNode node) -> node
def visit_global_variable_and_write_node(node)
node.desugar
end
- # $foo ||= bar
+ # `$foo ||= bar`
#
# becomes
#
- # defined?($foo) ? $foo : $foo = bar
+ # `defined?($foo) ? $foo : $foo = bar`
+ #--
+ #: (GlobalVariableOrWriteNode node) -> node
def visit_global_variable_or_write_node(node)
node.desugar
end
- # $foo += bar
+ # `$foo += bar`
#
# becomes
#
- # $foo = $foo + bar
+ # `$foo = $foo + bar`
+ #--
+ #: (GlobalVariableOperatorWriteNode node) -> node
def visit_global_variable_operator_write_node(node)
node.desugar
end
- # @foo &&= bar
+ # `@foo &&= bar`
#
# becomes
#
- # @foo && @foo = bar
+ # `@foo && @foo = bar`
+ #--
+ #: (InstanceVariableAndWriteNode node) -> node
def visit_instance_variable_and_write_node(node)
node.desugar
end
- # @foo ||= bar
+ # `@foo ||= bar`
#
# becomes
#
- # @foo || @foo = bar
+ # `@foo || @foo = bar`
+ #--
+ #: (InstanceVariableOrWriteNode node) -> node
def visit_instance_variable_or_write_node(node)
node.desugar
end
- # @foo += bar
+ # `@foo += bar`
#
# becomes
#
- # @foo = @foo + bar
+ # `@foo = @foo + bar`
+ #--
+ #: (InstanceVariableOperatorWriteNode node) -> node
def visit_instance_variable_operator_write_node(node)
node.desugar
end
- # foo &&= bar
+ # `foo &&= bar`
#
# becomes
#
- # foo && foo = bar
+ # `foo && foo = bar`
+ #--
+ #: (LocalVariableAndWriteNode node) -> node
def visit_local_variable_and_write_node(node)
node.desugar
end
- # foo ||= bar
+ # `foo ||= bar`
#
# becomes
#
- # foo || foo = bar
+ # `foo || foo = bar`
+ #--
+ #: (LocalVariableOrWriteNode node) -> node
def visit_local_variable_or_write_node(node)
node.desugar
end
- # foo += bar
+ # `foo += bar`
#
# becomes
#
- # foo = foo + bar
+ # `foo = foo + bar`
+ #--
+ #: (LocalVariableOperatorWriteNode node) -> node
def visit_local_variable_operator_write_node(node)
node.desugar
end
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index 29f312e479..6b9bde51ea 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
# typed: ignore
# This file is responsible for mirroring the API provided by the C extension by
@@ -7,7 +8,11 @@
require "rbconfig"
require "ffi"
-module Prism
+# We want to eagerly load this file if there are Ractors so that it does not get
+# autoloaded from within a non-main Ractor.
+require "prism/serialize" if defined?(Ractor)
+
+module Prism # :nodoc:
module LibRubyParser # :nodoc:
extend FFI::Library
@@ -15,7 +20,8 @@ module Prism
# must align with the build shared library from make/rake.
libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__)
libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}"
- if File.exist? libprism_in_build
+
+ if File.exist?(libprism_in_build)
INCLUDE_DIR = File.expand_path("../../include", __dir__)
ffi_lib libprism_in_build
else
@@ -53,6 +59,9 @@ module Prism
# We only want to load the functions that we are interested in.
next unless functions.any? { |function| line.include?(function) }
+ # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.)
+ line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");")
+
# Parse the function declaration.
unless /^PRISM_EXPORTED_FUNCTION (?<return_type>.+) (?<name>\w+)\((?<arg_types>.+)\);$/ =~ line
raise "Could not parse #{line}"
@@ -79,29 +88,44 @@ module Prism
raise "Could not find functions #{functions.inspect}" unless functions.empty?
end
- callback :pm_parse_stream_fgets_t, [:pointer, :int, :pointer], :pointer
- enum :pm_string_init_result_t, %i[PM_STRING_INIT_SUCCESS PM_STRING_INIT_ERROR_GENERIC PM_STRING_INIT_ERROR_DIRECTORY]
+ callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer
+ callback :pm_source_stream_feof_t, [:pointer], :int
+ pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR]
+ enum :pm_source_init_result_t, pm_source_init_result_values
enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE]
+ # Ractor-safe lookup table for pm_source_init_result_t, since FFI's
+ # enum_type accesses module instance variables that are not shareable.
+ SOURCE_INIT_RESULT = pm_source_init_result_values.freeze
+
load_exported_functions_from(
- "prism.h",
+ "prism/version.h",
"pm_version",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/serialize.h",
"pm_serialize_parse",
"pm_serialize_parse_stream",
"pm_serialize_parse_comments",
"pm_serialize_lex",
"pm_serialize_parse_lex",
- "pm_parse_success_p",
+ "pm_serialize_parse_success_p",
+ []
+ )
+
+ load_exported_functions_from(
+ "prism/string_query.h",
"pm_string_query_local",
"pm_string_query_constant",
"pm_string_query_method_name",
- [:pm_parse_stream_fgets_t]
+ []
)
load_exported_functions_from(
- "prism/util/pm_buffer.h",
- "pm_buffer_sizeof",
- "pm_buffer_init",
+ "prism/buffer.h",
+ "pm_buffer_new",
"pm_buffer_value",
"pm_buffer_length",
"pm_buffer_free",
@@ -109,20 +133,19 @@ module Prism
)
load_exported_functions_from(
- "prism/util/pm_string.h",
- "pm_string_mapped_init",
- "pm_string_free",
- "pm_string_source",
- "pm_string_length",
- "pm_string_sizeof",
- []
+ "prism/source.h",
+ "pm_source_file_new",
+ "pm_source_mapped_new",
+ "pm_source_stream_new",
+ "pm_source_free",
+ "pm_source_source",
+ "pm_source_length",
+ [:pm_source_stream_fgets_t, :pm_source_stream_feof_t]
)
# This object represents a pm_buffer_t. We only use it as an opaque pointer,
# so it doesn't need to know the fields of pm_buffer_t.
class PrismBuffer # :nodoc:
- SIZEOF = LibRubyParser.pm_buffer_sizeof
-
attr_reader :pointer
def initialize(pointer)
@@ -144,19 +167,22 @@ module Prism
# Initialize a new buffer and yield it to the block. The buffer will be
# automatically freed when the block returns.
def self.with
- FFI::MemoryPointer.new(SIZEOF) do |pointer|
- raise unless LibRubyParser.pm_buffer_init(pointer)
- return yield new(pointer)
+ buffer = LibRubyParser.pm_buffer_new
+ raise unless buffer
+
+ begin
+ yield new(buffer)
ensure
- LibRubyParser.pm_buffer_free(pointer)
+ LibRubyParser.pm_buffer_free(buffer)
end
end
end
- # This object represents a pm_string_t. We only use it as an opaque pointer,
- # so it doesn't have to be an FFI::Struct.
- class PrismString # :nodoc:
- SIZEOF = LibRubyParser.pm_string_sizeof
+ # This object represents source code to be parsed. For strings it wraps a
+ # pointer directly; for files it uses a pm_source_t under the hood.
+ class PrismSource # :nodoc:
+ PLATFORM_EXPECTS_UTF8 =
+ RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i)
attr_reader :pointer, :length
@@ -171,7 +197,7 @@ module Prism
@pointer.read_string(@length)
end
- # Yields a pm_string_t pointer to the given block.
+ # Yields a PrismSource backed by the given string to the block.
def self.with_string(string)
raise TypeError unless string.is_a?(String)
@@ -185,33 +211,38 @@ module Prism
end
end
- # Yields a pm_string_t pointer to the given block.
+ # Yields a PrismSource to the given block, backed by a pm_source_t.
def self.with_file(filepath)
raise TypeError unless filepath.is_a?(String)
# On Windows and Mac, it's expected that filepaths will be encoded in
# UTF-8. If they are not, we need to convert them to UTF-8 before
- # passing them into pm_string_mapped_init.
- if RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) &&
- (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
+ # passing them into pm_source_mapped_new.
+ if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8
filepath = filepath.encode(Encoding::UTF_8)
end
- FFI::MemoryPointer.new(SIZEOF) do |pm_string|
- case (result = LibRubyParser.pm_string_mapped_init(pm_string, filepath))
- when :PM_STRING_INIT_SUCCESS
- pointer = LibRubyParser.pm_string_source(pm_string)
- length = LibRubyParser.pm_string_length(pm_string)
+ FFI::MemoryPointer.new(:int) do |result_ptr|
+ pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr)
+
+ case SOURCE_INIT_RESULT[result_ptr.read_int]
+ when :PM_SOURCE_INIT_SUCCESS
+ pointer = LibRubyParser.pm_source_source(pm_source)
+ length = LibRubyParser.pm_source_length(pm_source)
return yield new(pointer, length, false)
- when :PM_STRING_INIT_ERROR_GENERIC
+ when :PM_SOURCE_INIT_ERROR_GENERIC
raise SystemCallError.new(filepath, FFI.errno)
- when :PM_STRING_INIT_ERROR_DIRECTORY
+ when :PM_SOURCE_INIT_ERROR_DIRECTORY
raise Errno::EISDIR.new(filepath)
+ when :PM_SOURCE_INIT_ERROR_NON_REGULAR
+ # Fall back to reading the file through Ruby IO for non-regular
+ # files (pipes, character devices, etc.)
+ return with_string(File.read(filepath)) { |string| yield string }
else
- raise "Unknown error initializing pm_string_t: #{result.inspect}"
+ raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}"
end
ensure
- LibRubyParser.pm_string_free(pm_string)
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
end
end
end
@@ -222,34 +253,34 @@ module Prism
private_constant :LibRubyParser
# The version constant is set by reading the result of calling pm_version.
- VERSION = LibRubyParser.pm_version.read_string
+ VERSION = LibRubyParser.pm_version.read_string.freeze
class << self
# Mirror the Prism.dump API by using the serialization API.
def dump(source, **options)
- LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) }
+ LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) }
end
# Mirror the Prism.dump_file API by using the serialization API.
def dump_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| dump_common(string, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) }
end
# Mirror the Prism.lex API by using the serialization API.
def lex(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| lex_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) }
end
# Mirror the Prism.lex_file API by using the serialization API.
def lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| lex_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) }
end
# Mirror the Prism.parse API by using the serialization API.
def parse(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) }
end
# Mirror the Prism.parse_file API by using the serialization API. This uses
@@ -257,7 +288,7 @@ module Prism
# when it is available.
def parse_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) }
end
# Mirror the Prism.parse_stream API by using the serialization API.
@@ -273,19 +304,21 @@ module Prism
end
}
- # In the pm_serialize_parse_stream function it accepts a pointer to the
- # IO object as a void* and then passes it through to the callback as the
- # third argument, but it never touches it itself. As such, since we have
- # access to the IO object already through the closure of the lambda, we
- # can pass a null pointer here and not worry.
- LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, dump_options(options))
- Prism.load(source, buffer.read)
+ eof_callback = -> (_) { stream.eof? }
+
+ pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback)
+ begin
+ LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options))
+ Prism.load(source, buffer.read, options.fetch(:freeze, false))
+ ensure
+ LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null?
+ end
end
end
# Mirror the Prism.parse_comments API by using the serialization API.
def parse_comments(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) }
end
# Mirror the Prism.parse_file_comments API by using the serialization
@@ -293,23 +326,23 @@ module Prism
# to use mmap when it is available.
def parse_file_comments(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) }
end
# Mirror the Prism.parse_lex API by using the serialization API.
def parse_lex(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) }
end
# Mirror the Prism.parse_lex_file API by using the serialization API.
def parse_lex_file(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) }
end
# Mirror the Prism.parse_success? API by using the serialization API.
def parse_success?(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) }
+ LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) }
end
# Mirror the Prism.parse_failure? API by using the serialization API.
@@ -320,7 +353,7 @@ module Prism
# Mirror the Prism.parse_file_success? API by using the serialization API.
def parse_file_success?(filepath, **options)
options[:filepath] = filepath
- LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) }
+ LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) }
end
# Mirror the Prism.parse_file_failure? API by using the serialization API.
@@ -330,7 +363,7 @@ module Prism
# Mirror the Prism.profile API by using the serialization API.
def profile(source, **options)
- LibRubyParser::PrismString.with_string(source) do |string|
+ LibRubyParser::PrismSource.with_string(source) do |string|
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
nil
@@ -340,7 +373,7 @@ module Prism
# Mirror the Prism.profile_file API by using the serialization API.
def profile_file(filepath, **options)
- LibRubyParser::PrismString.with_file(filepath) do |string|
+ LibRubyParser::PrismSource.with_file(filepath) do |string|
LibRubyParser::PrismBuffer.with do |buffer|
options[:filepath] = filepath
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
@@ -354,55 +387,42 @@ module Prism
def dump_common(string, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
- buffer.read
+
+ dumped = buffer.read
+ dumped.freeze if options.fetch(:freeze, false)
+
+ dumped
end
end
def lex_common(string, code, options) # :nodoc:
- serialized = LibRubyParser::PrismBuffer.with do |buffer|
+ LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
- buffer.read
+ Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false))
end
-
- Serialize.load_tokens(Source.for(code), serialized)
end
def parse_common(string, code, options) # :nodoc:
serialized = dump_common(string, options)
- Prism.load(code, serialized)
+ Serialize.load_parse(code, serialized, options.fetch(:freeze, false))
end
def parse_comments_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options))
-
- source = Source.for(code)
- loader = Serialize::Loader.new(source, buffer.read)
-
- loader.load_header
- loader.load_encoding
- loader.load_start_line
- loader.load_comments
+ Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false))
end
end
def parse_lex_common(string, code, options) # :nodoc:
LibRubyParser::PrismBuffer.with do |buffer|
LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options))
-
- source = Source.for(code)
- loader = Serialize::Loader.new(source, buffer.read)
-
- tokens = loader.load_tokens
- node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
- tokens.each { |token,| token.value.force_encoding(loader.encoding) }
-
- ParseLexResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
+ Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false))
end
end
def parse_file_success_common(string, options) # :nodoc:
- LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options))
+ LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options))
end
# Return the value that should be dumped for the command_line option.
@@ -426,14 +446,35 @@ module Prism
# Return the value that should be dumped for the version option.
def dump_options_version(version)
case version
- when nil, "latest"
- 0
+ when "current"
+ version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION)
+ when "latest", nil
+ 0 # Handled in pm_parser_init
+ when "nearest"
+ dump = version_string_to_number(RUBY_VERSION)
+ return dump if dump
+ if RUBY_VERSION < "3.3"
+ version_string_to_number("3.3")
+ else
+ 0 # Handled in pm_parser_init
+ end
+ else
+ version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}")
+ end
+ end
+
+ # Converts a version string like "4.0.0" or "4.0" into a number.
+ # Returns nil if the version is unknown.
+ def version_string_to_number(version)
+ case version
when /\A3\.3(\.\d+)?\z/
1
when /\A3\.4(\.\d+)?\z/
- 0
- else
- raise ArgumentError, "invalid version: #{version}"
+ 2
+ when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/
+ 3
+ when /\A4\.1(\.\d+)?\z/
+ 4
end
end
@@ -480,15 +521,43 @@ module Prism
template << "C"
values << (options.fetch(:partial_script, false) ? 1 : 0)
+ template << "C"
+ values << (options.fetch(:freeze, false) ? 1 : 0)
+
template << "L"
if (scopes = options[:scopes])
values << scopes.length
scopes.each do |scope|
+ locals = nil
+ forwarding = 0
+
+ case scope
+ when Array
+ locals = scope
+ when Scope
+ locals = scope.locals
+
+ scope.forwarding.each do |forward|
+ case forward
+ when :* then forwarding |= 0x1
+ when :** then forwarding |= 0x2
+ when :& then forwarding |= 0x4
+ when :"..." then forwarding |= 0x8
+ else raise ArgumentError, "invalid forwarding value: #{forward}"
+ end
+ end
+ else
+ raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)"
+ end
+
template << "L"
- values << scope.length
+ values << locals.length
+
+ template << "C"
+ values << forwarding
- scope.each do |local|
+ locals.each do |local|
name = local.name
template << "L"
values << name.bytesize
@@ -507,7 +576,7 @@ module Prism
# Here we are going to patch StringQuery to put in the class-level methods so
# that it can maintain a consistent interface
- class StringQuery
+ class StringQuery # :nodoc:
class << self
# Mirrors the C extension's StringQuery::local? method.
def local?(string)
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index a83c24cb41..7aacec037d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -1,28 +1,64 @@
# frozen_string_literal: true
-
-require "delegate"
-require "ripper"
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
+ # @rbs!
+ # module Translation
+ # class Ripper
+ # EXPR_NONE: Integer
+ # EXPR_BEG: Integer
+ # EXPR_MID: Integer
+ # EXPR_END: Integer
+ # EXPR_CLASS: Integer
+ # EXPR_VALUE: Integer
+ # EXPR_ARG: Integer
+ # EXPR_CMDARG: Integer
+ # EXPR_ENDARG: Integer
+ # EXPR_ENDFN: Integer
+ #
+ # class Lexer < Ripper
+ # class State
+ # def self.[]: (Integer value) -> State
+ # end
+ # end
+ #
+ # class LineAndColumnCache
+ # def initialize: (Source source) -> void
+ #
+ # def line_and_column: (Integer byte_offset) -> [Integer, Integer]
+ # end
+ # end
+ # end
+
# This class is responsible for lexing the source using prism and then
# converting those tokens to be compatible with Ripper. In the vast majority
# of cases, this is a one-to-one mapping of the token type. Everything else
# generally lines up. However, there are a few cases that require special
# handling.
class LexCompat # :nodoc:
+ # @rbs!
+ # # A token produced by the Ripper lexer that Prism is replicating.
+ # type lex_compat_token = [[Integer, Integer], Symbol, String, untyped]
+
# A result class specialized for holding tokens produced by the lexer.
class Result < Prism::Result
# The list of tokens that were produced by the lexer.
- attr_reader :value
+ attr_reader :value #: Array[lex_compat_token]
# Create a new lex compat result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (Array[lex_compat_token] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for Result.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
end
@@ -104,6 +140,7 @@ module Prism
KEYWORD_DEF: :on_kw,
KEYWORD_DEFINED: :on_kw,
KEYWORD_DO: :on_kw,
+ KEYWORD_DO_BLOCK: :on_kw,
KEYWORD_DO_LOOP: :on_kw,
KEYWORD_ELSE: :on_kw,
KEYWORD_ELSIF: :on_kw,
@@ -198,93 +235,6 @@ module Prism
"__END__": :on___end__
}.freeze
- # When we produce tokens, we produce the same arrays that Ripper does.
- # However, we add a couple of convenience methods onto them to make them a
- # little easier to work with. We delegate all other methods to the array.
- class Token < SimpleDelegator
- # @dynamic initialize, each, []
-
- # The location of the token in the source.
- def location
- self[0]
- end
-
- # The type of the token.
- def event
- self[1]
- end
-
- # The slice of the source that this token represents.
- def value
- self[2]
- end
-
- # The state of the lexer when this token was produced.
- def state
- self[3]
- end
- end
-
- # Ripper doesn't include the rest of the token in the event, so we need to
- # trim it down to just the content on the first line when comparing.
- class EndContentToken < Token
- def ==(other) # :nodoc:
- [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
- end
- end
-
- # Tokens where state should be ignored
- # used for :on_comment, :on_heredoc_end, :on_embexpr_end
- class IgnoreStateToken < Token
- def ==(other) # :nodoc:
- self[0...-1] == other[0...-1]
- end
- end
-
- # Ident tokens for the most part are exactly the same, except sometimes we
- # know an ident is a local when ripper doesn't (when they are introduced
- # through named captures in regular expressions). In that case we don't
- # compare the state.
- class IdentToken < Token
- def ==(other) # :nodoc:
- (self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
- (other[3] & Ripper::EXPR_ARG_ANY != 0)
- )
- end
- end
-
- # Ignored newlines can occasionally have a LABEL state attached to them, so
- # we compare the state differently here.
- class IgnoredNewlineToken < Token
- def ==(other) # :nodoc:
- return false unless self[0...-1] == other[0...-1]
-
- if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
- other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0
- else
- self[3] == other[3]
- end
- end
- end
-
- # If we have an identifier that follows a method name like:
- #
- # def foo bar
- #
- # then Ripper will mark bar as END|LABEL if there is a local in a parent
- # scope named bar because it hasn't pushed the local table yet. We do this
- # more accurately, so we need to allow comparing against both END and
- # END|LABEL.
- class ParamToken < Token
- def ==(other) # :nodoc:
- (self[0...-1] == other[0...-1]) && (
- (other[3] == Ripper::EXPR_END) ||
- (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
- )
- end
- end
-
# A heredoc in this case is a list of tokens that belong to the body of the
# heredoc that should be appended onto the list of tokens when the heredoc
# closes.
@@ -294,16 +244,19 @@ module Prism
# order back into the token stream and set the state of the last token to
# the state that the heredoc was opened in.
class PlainHeredoc # :nodoc:
- attr_reader :tokens
+ attr_reader :tokens #: Array[lex_compat_token]
+ #: () -> void
def initialize
@tokens = []
end
+ #: (lex_compat_token token) -> void
def <<(token)
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
tokens
end
@@ -313,22 +266,26 @@ module Prism
# that need to be split on "\\\n" to mimic Ripper's behavior. We also need
# to keep track of the state that the heredoc was opened in.
class DashHeredoc # :nodoc:
- attr_reader :split, :tokens
+ attr_reader :split #: bool
+ attr_reader :tokens #: Array[lex_compat_token]
+ #: (bool split) -> void
def initialize(split)
@split = split
@tokens = []
end
+ #: (lex_compat_token token) -> void
def <<(token)
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
embexpr_balance = 0
- tokens.each_with_object([]) do |token, results| #$ Array[Token]
- case token.event
+ tokens.each_with_object([]) do |token, results| #$ Array[lex_compat_token]
+ case token[1]
when :on_embexpr_beg
embexpr_balance += 1
results << token
@@ -343,9 +300,9 @@ module Prism
if split
# Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
# to keep the delimiter in the result.
- token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
+ token[2].split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
column = 0 if index > 0
- results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ results << [[lineno, column], :on_tstring_content, value, token[3]]
lineno += value.count("\n")
end
else
@@ -374,8 +331,13 @@ module Prism
class DedentingHeredoc # :nodoc:
TAB_WIDTH = 8
- attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
+ attr_reader :tokens #: Array[lex_compat_token]
+ attr_reader :dedent_next #: bool
+ attr_reader :dedent #: Integer?
+ attr_reader :embexpr_balance #: Integer
+ # @rbs @ended_on_newline: bool
+ #: () -> void
def initialize
@tokens = []
@dedent_next = true
@@ -387,8 +349,10 @@ module Prism
# As tokens are coming in, we track the minimum amount of common leading
# whitespace on plain string content tokens. This allows us to later
# remove that amount of whitespace from the beginning of each line.
+ #
+ #: (lex_compat_token token) -> void
def <<(token)
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
@embexpr_balance += 1
@dedent = 0 if @dedent_next && @ended_on_newline
@@ -396,10 +360,10 @@ module Prism
@embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
- line = token.value
+ line = token[2]
if dedent_next && !(line.strip.empty? && line.end_with?("\n"))
- leading = line[/\A(\s*)\n?/, 1]
+ leading = line[/\A(\s*)\n?/, 1] #: String
next_dedent = 0
leading.each_char do |char|
@@ -419,20 +383,21 @@ module Prism
end
end
- @dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
+ @dedent_next = token[1] == :on_tstring_content && embexpr_balance == 0
@ended_on_newline = false
tokens << token
end
+ #: () -> Array[lex_compat_token]
def to_a
# If every line in the heredoc is blank, we still need to split up the
# string content token into multiple tokens.
if dedent.nil?
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
embexpr_balance = 0
tokens.each do |token|
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
results << token
@@ -444,9 +409,9 @@ module Prism
lineno = token[0][0]
column = token[0][1]
- token.value.split(/(?<=\n)/).each_with_index do |value, index|
+ token[2].split(/(?<=\n)/).each_with_index do |value, index|
column = 0 if index > 0
- results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
+ results << [[lineno, column], :on_tstring_content, value, token[3]]
lineno += 1
end
else
@@ -463,7 +428,7 @@ module Prism
# If the minimum common whitespace is 0, then we need to concatenate
# string nodes together that are immediately adjacent.
if dedent == 0
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
embexpr_balance = 0
index = 0
@@ -474,15 +439,15 @@ module Prism
results << token
index += 1
- case token.event
+ case token[1]
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
when :on_embexpr_end, :on_heredoc_end
embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
- while index < max_index && tokens[index].event == :on_tstring_content && !token.value.match?(/\\\r?\n\z/)
- token.value << tokens[index].value
+ while index < max_index && tokens[index][1] == :on_tstring_content && !token[2].match?(/\\\r?\n\z/)
+ token[2] << tokens[index][2]
index += 1
end
end
@@ -496,7 +461,7 @@ module Prism
# insert on_ignored_sp tokens for the amount of dedent that we need to
# perform. We also need to remove the dedent from the beginning of
# each line of plain string content tokens.
- results = [] #: Array[Token]
+ results = [] #: Array[lex_compat_token]
dedent_next = true
embexpr_balance = 0
@@ -505,7 +470,7 @@ module Prism
# whitespace calculation we performed above. This is because
# checking if the subsequent token needs to be dedented is common to
# both the dedent calculation and the ignored_sp insertion.
- case token.event
+ case token[1]
when :on_embexpr_beg
embexpr_balance += 1
results << token
@@ -517,7 +482,7 @@ module Prism
# Here we're going to split the string on newlines, but maintain
# the newlines in the resulting array. We'll do that with a look
# behind assertion.
- splits = token.value.split(/(?<=\n)/)
+ splits = token[2].split(/(?<=\n)/)
index = 0
while index < splits.length
@@ -535,7 +500,8 @@ module Prism
# line or this line doesn't start with whitespace, then we
# should concatenate the rest of the string to match ripper.
if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
- line = splits[index..].join
+ unjoined = splits[index..] #: Array[String]
+ line = unjoined.join
index = splits.length
end
@@ -574,12 +540,12 @@ module Prism
ignored = deleted_chars.join
line.delete_prefix!(ignored)
- results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
+ results << [[lineno, 0], :on_ignored_sp, ignored, token[3]]
column = ignored.length
end
end
- results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
+ results << [[lineno, column], token[1], line, token[3]] unless line.empty?
index += 1
end
else
@@ -590,7 +556,7 @@ module Prism
end
dedent_next =
- ((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
+ ((token[1] == :on_tstring_content) || (token[1] == :on_heredoc_end)) &&
embexpr_balance == 0
end
@@ -600,12 +566,14 @@ module Prism
# Here we will split between the two types of heredocs and return the
# object that will store their tokens.
+ #--
+ #: (lex_compat_token opening) -> (PlainHeredoc | DashHeredoc | DedentingHeredoc)
def self.build(opening)
- case opening.value[2]
+ case opening[2][2]
when "~"
DedentingHeredoc.new
when "-"
- DashHeredoc.new(opening.value[3] != "'")
+ DashHeredoc.new(opening[2][3] != "'")
else
PlainHeredoc.new
end
@@ -614,33 +582,43 @@ module Prism
private_constant :Heredoc
- attr_reader :source, :options
+ # In previous versions of Ruby, Ripper wouldn't flush the bom before the
+ # first token, so we had to have a hack in place to account for that.
+ BOM_FLUSHED = RUBY_VERSION >= "3.3.0"
+ private_constant :BOM_FLUSHED
+ attr_reader :options #: Hash[Symbol, untyped]
+ # @rbs @source: String
+
+ #: (String source, **untyped options) -> void
def initialize(source, **options)
@source = source
@options = options
end
+ #: () -> Result
def result
- tokens = [] #: Array[LexCompat::Token]
+ tokens = [] #: Array[lex_compat_token]
state = :default
heredoc_stack = [[]] #: Array[Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]]
- result = Prism.lex(source, **options)
+ result = Prism.lex(@source, **options)
+ source = result.source
result_value = result.value
- previous_state = nil #: Ripper::Lexer::State?
+ previous_state = nil #: Translation::Ripper::Lexer::State?
last_heredoc_end = nil #: Integer?
+ eof_token = nil #: Token?
+
+ bom = source.slice(0, 3) == "\xEF\xBB\xBF"
- # In previous versions of Ruby, Ripper wouldn't flush the bom before the
- # first token, so we had to have a hack in place to account for that. This
- # checks for that behavior.
- bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
- bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
+ result_value.each_with_index do |(prism_token, prism_state), index|
+ lineno = prism_token.location.start_line
+ column = prism_token.location.start_column
- result_value.each_with_index do |(token, lex_state), index|
- lineno = token.location.start_line
- column = token.location.start_column
+ event = RIPPER.fetch(prism_token.type)
+ value = prism_token.value
+ lex_state = Translation::Ripper::Lexer::State[prism_state]
# If there's a UTF-8 byte-order mark as the start of the file, then for
# certain tokens ripper sets the first token back by 3 bytes. It also
@@ -650,70 +628,53 @@ module Prism
if bom && lineno == 1
column -= 3
- if index == 0 && column == 0 && !bom_flushed
+ if index == 0 && column == 0 && !BOM_FLUSHED
flushed =
- case token.type
+ case prism_token.type
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
:GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
:PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
:PERCENT_UPPER_W, :STRING_BEGIN
true
when :REGEXP_BEGIN, :SYMBOL_BEGIN
- token.value.start_with?("%")
+ value.start_with?("%")
else
false
end
unless flushed
column -= 3
- value = token.value
value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
end
end
end
- event = RIPPER.fetch(token.type)
- value = token.value
- lex_state = Ripper::Lexer::State.new(lex_state)
-
- token =
+ lex_compat_token =
case event
when :on___end__
- EndContentToken.new([[lineno, column], event, value, lex_state])
+ # Ripper doesn't include the rest of the token in the event, so we need to
+ # trim it down to just the content on the first line.
+ value = value[0..value.index("\n")] #: String
+ [[lineno, column], event, value, lex_state]
when :on_comment
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
when :on_heredoc_end
# Heredoc end tokens can be emitted in an odd order, so we don't
# want to bother comparing the state on them.
- last_heredoc_end = token.location.end_offset
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
- when :on_ident
- if lex_state == Ripper::EXPR_END
- # If we have an identifier that follows a method name like:
- #
- # def foo bar
- #
- # then Ripper will mark bar as END|LABEL if there is a local in a
- # parent scope named bar because it hasn't pushed the local table
- # yet. We do this more accurately, so we need to allow comparing
- # against both END and END|LABEL.
- ParamToken.new([[lineno, column], event, value, lex_state])
- elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
- # In the event that we're comparing identifiers, we're going to
- # allow a little divergence. Ripper doesn't account for local
- # variables introduced through named captures in regexes, and we
- # do, which accounts for this difference.
- IdentToken.new([[lineno, column], event, value, lex_state])
- else
- Token.new([[lineno, column], event, value, lex_state])
- end
+ last_heredoc_end = prism_token.location.end_offset
+ [[lineno, column], event, value, lex_state]
when :on_embexpr_end
- IgnoreStateToken.new([[lineno, column], event, value, lex_state])
- when :on_ignored_nl
- # Ignored newlines can occasionally have a LABEL state attached to
- # them which doesn't actually impact anything. We don't mirror that
- # state so we ignored it.
- IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
+ when :on_words_sep
+ # Ripper emits one token each per line.
+ value.each_line.with_index do |line, index|
+ if index > 0
+ lineno += 1
+ column = 0
+ end
+ tokens << [[lineno, column], event, line, lex_state]
+ end
+ tokens.pop #: lex_compat_token
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. prism sets lex state
@@ -738,13 +699,14 @@ module Prism
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end
- Ripper::Lexer::State.new(result_value[current_index][1])
+ Translation::Ripper::Lexer::State[result_value[current_index][1]]
else
previous_state
end
- Token.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
when :on_eof
+ eof_token = prism_token
previous_token = result_value[index - 1][0]
# If we're at the end of the file and the previous token was a
@@ -759,7 +721,7 @@ module Prism
# Use the greater offset of the two to determine the start of
# the trailing whitespace.
start_offset = [previous_token.location.end_offset, last_heredoc_end].compact.max
- end_offset = token.location.start_offset
+ end_offset = prism_token.location.start_offset
if start_offset < end_offset
if bom
@@ -767,14 +729,14 @@ module Prism
end_offset += 3
end
- tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+ tokens << [[lineno, 0], :on_nl, source.slice(start_offset, end_offset - start_offset), lex_state]
end
end
- Token.new([[lineno, column], event, value, lex_state])
+ [[lineno, column], event, value, lex_state]
else
- Token.new([[lineno, column], event, value, lex_state])
- end
+ [[lineno, column], event, value, lex_state]
+ end #: lex_compat_token
previous_state = lex_state
@@ -791,19 +753,19 @@ module Prism
when :default
# The default state is when there are no heredocs at all. In this
# state we can append the token to the list of tokens and move on.
- tokens << token
+ tokens << lex_compat_token
# If we get the declaration of a heredoc, then we open a new heredoc
# and move into the heredoc_opened state.
if event == :on_heredoc_beg
state = :heredoc_opened
- heredoc_stack.last << Heredoc.build(token)
+ heredoc_stack.last << Heredoc.build(lex_compat_token)
end
when :heredoc_opened
# The heredoc_opened state is when we've seen the declaration of a
# heredoc and are now lexing the body of the heredoc. In this state we
# push tokens onto the most recently created heredoc.
- heredoc_stack.last.last << token
+ heredoc_stack.last.last << lex_compat_token
case event
when :on_heredoc_beg
@@ -811,7 +773,7 @@ module Prism
# heredoc, this means we have nested heredocs. In this case we'll
# push a new heredoc onto the stack and stay in the heredoc_opened
# state since we're now lexing the body of the new heredoc.
- heredoc_stack << [Heredoc.build(token)]
+ heredoc_stack << [Heredoc.build(lex_compat_token)]
when :on_heredoc_end
# If we receive the end of a heredoc, then we're done lexing the
# body of the heredoc. In this case we now have a completed heredoc
@@ -820,10 +782,10 @@ module Prism
state = :heredoc_closed
end
when :heredoc_closed
- if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
+ if %i[on_nl on_ignored_nl on_comment].include?(event) || ((event == :on_tstring_content) && value.end_with?("\n"))
if heredoc_stack.size > 1
- flushing = heredoc_stack.pop
- heredoc_stack.last.last << token
+ flushing = heredoc_stack.pop #: Array[Heredoc::PlainHeredoc | Heredoc::DashHeredoc | Heredoc::DedentingHeredoc]
+ heredoc_stack.last.last << lex_compat_token
flushing.each do |heredoc|
heredoc.to_a.each do |flushed_token|
@@ -835,12 +797,12 @@ module Prism
next
end
elsif event == :on_heredoc_beg
- tokens << token
+ tokens << lex_compat_token
state = :heredoc_opened
- heredoc_stack.last << Heredoc.build(token)
+ heredoc_stack.last << Heredoc.build(lex_compat_token)
next
elsif heredoc_stack.size > 1
- heredoc_stack[-2].last << token
+ heredoc_stack[-2].last << lex_compat_token
next
end
@@ -851,77 +813,94 @@ module Prism
heredoc_stack.last.clear
state = :default
- tokens << token
+ tokens << lex_compat_token
end
end
- # Drop the EOF token from the list
- tokens = tokens[0...-1]
-
- # We sort by location to compare against Ripper's output
- tokens.sort_by!(&:location)
-
- Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.for(source))
- end
- end
+ # Drop the EOF token from the list. The EOF token may not be
+ # present if the source was syntax invalid
+ if tokens.dig(-1, 1) == :on_eof
+ tokens = tokens[0...-1] #: Array[lex_compat_token]
+ end
- private_constant :LexCompat
+ # We sort by location because Ripper.lex sorts.
+ tokens.sort_by! do |token|
+ line, column = token[0]
+ source.byte_offset(line, column)
+ end
- # This is a class that wraps the Ripper lexer to produce almost exactly the
- # same tokens.
- class LexRipper # :nodoc:
- attr_reader :source
+ tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token)
- def initialize(source)
- @source = source
+ Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, result.continuable?, source)
end
- def result
- previous = [] #: [[Integer, Integer], Symbol, String, untyped] | []
- results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]]
-
- lex(source).each do |token|
- case token[1]
- when :on_sp
- # skip
- when :on_tstring_content
- if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
- previous[2] << token[2]
- else
- results << token
- previous = token
- end
- when :on_words_sep
- if previous[1] == :on_words_sep
- previous[2] << token[2]
+ private
+
+ #: (Array[lex_compat_token] tokens, Source source, Location? data_loc, bool bom, Token? eof_token) -> Array[lex_compat_token]
+ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
+ new_tokens = [] #: Array[lex_compat_token]
+
+ prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
+ prev_token_end = bom ? 3 : 0
+
+ cache = Translation::Ripper::LineAndColumnCache.new(source)
+
+ tokens.each do |token|
+ # Skip missing heredoc ends.
+ next if token[1] == :on_heredoc_end && token[2] == ""
+
+ # Add :on_sp tokens.
+ line, column = token[0]
+ start_offset = source.byte_offset(line, column)
+
+ # Ripper reports columns on line 1 without counting the BOM, so we
+ # adjust to get the real offset
+ start_offset += 3 if line == 1 && bom
+
+ if start_offset > prev_token_end
+ sp_value = source.slice(prev_token_end, start_offset - prev_token_end)
+ sp_line, sp_column = cache.line_and_column(prev_token_end)
+ # Ripper reports columns on line 1 without counting the BOM
+ sp_column -= 3 if sp_line == 1 && bom
+ continuation_index = sp_value.byteindex("\\")
+
+ # ripper emits up to three :on_sp tokens when line continuations are used
+ if continuation_index
+ next_whitespace_index = continuation_index + 1
+ next_whitespace_index += 1 if sp_value.byteslice(next_whitespace_index) == "\r"
+ next_whitespace_index += 1
+ first_whitespace = sp_value[0...continuation_index] #: String
+ continuation = sp_value[continuation_index...next_whitespace_index] #: String
+ second_whitespace = sp_value[next_whitespace_index..] || ""
+
+ new_tokens << [[sp_line, sp_column], :on_sp, first_whitespace, prev_token_state] unless first_whitespace.empty?
+ new_tokens << [[sp_line, sp_column + continuation_index], :on_sp, continuation, prev_token_state]
+ new_tokens << [[sp_line + 1, 0], :on_sp, second_whitespace, prev_token_state] unless second_whitespace.empty?
else
- results << token
- previous = token
+ new_tokens << [[sp_line, sp_column], :on_sp, sp_value, prev_token_state]
end
- else
- results << token
- previous = token
end
- end
-
- results
- end
-
- private
- if Ripper.method(:lex).parameters.assoc(:keyrest)
- def lex(source)
- Ripper.lex(source, raise_errors: true)
+ new_tokens << token
+ prev_token_state = token[3]
+ prev_token_end = start_offset + token[2].bytesize
end
- else
- def lex(source)
- ripper = Ripper::Lexer.new(source)
- ripper.lex.tap do |result|
- raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any?
+
+ if !data_loc && eof_token # no trailing :on_sp with __END__ as it is always preceded by :on_nl
+ end_offset = eof_token.location.end_offset
+ if prev_token_end < end_offset
+ new_tokens << [
+ [source.line(prev_token_end), source.column(prev_token_end)],
+ :on_sp,
+ source.slice(prev_token_end, end_offset - prev_token_end),
+ prev_token_state
+ ]
end
end
+
+ new_tokens
end
end
- private_constant :LexRipper
+ private_constant :LexCompat
end
diff --git a/lib/prism/node_ext.rb b/lib/prism/node_ext.rb
index 4dfcebd638..8a6624e76d 100644
--- a/lib/prism/node_ext.rb
+++ b/lib/prism/node_ext.rb
@@ -1,15 +1,20 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+#--
# Here we are reopening the prism module to provide methods on nodes that aren't
# templated and are meant as convenience methods.
+#++
module Prism
class Node
+ #: (*String replacements) -> void
def deprecated(*replacements) # :nodoc:
- location = caller_locations(1, 1)
- location = location[0].label if location
+ location = caller_locations(1, 1)&.[](0)&.label
suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
- warn(<<~MSG, category: :deprecated)
+ warn(<<~MSG, uplevel: 1, category: :deprecated)
[deprecation]: #{self.class}##{location} is deprecated and will be \
removed in the next major version. Use #{suggest.join("/")} instead.
#{(caller(1, 3) || []).join("\n")}
@@ -20,7 +25,9 @@ module Prism
module RegularExpressionOptions # :nodoc:
# Returns a numeric value that represents the flags that were used to create
# the regular expression.
- def options
+ #--
+ #: (Integer flags) -> Integer
+ def self.options(flags)
o = 0
o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
@@ -32,43 +39,87 @@ module Prism
end
class InterpolatedMatchLastLineNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class InterpolatedRegularExpressionNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class MatchLastLineNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
class RegularExpressionNode < Node
- include RegularExpressionOptions
+ # Returns a numeric value that represents the flags that were used to create
+ # the regular expression.
+ #--
+ #: () -> Integer
+ def options
+ RegularExpressionOptions.options(flags)
+ end
end
private_constant :RegularExpressionOptions
module HeredocQuery # :nodoc:
# Returns true if this node was represented as a heredoc in the source code.
- def heredoc?
+ #--
+ #: (String? opening) -> bool?
+ def self.heredoc?(opening)
+ # @type self: InterpolatedStringNode | InterpolatedXStringNode | StringNode | XStringNode
opening&.start_with?("<<")
end
end
class InterpolatedStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
end
class InterpolatedXStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
end
class StringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
# Occasionally it's helpful to treat a string as if it were interpolated so
# that there's a consistent interface for working with strings.
+ #--
+ #: () -> InterpolatedStringNode
def to_interpolated
InterpolatedStringNode.new(
source,
@@ -83,10 +134,17 @@ module Prism
end
class XStringNode < Node
- include HeredocQuery
+ # Returns true if this node was represented as a heredoc in the source code.
+ #--
+ #: () -> bool?
+ def heredoc?
+ HeredocQuery.heredoc?(opening)
+ end
# Occasionally it's helpful to treat a string as if it were interpolated so
# that there's a consistent interface for working with strings.
+ #--
+ #: () -> InterpolatedXStringNode
def to_interpolated
InterpolatedXStringNode.new(
source,
@@ -104,6 +162,8 @@ module Prism
class ImaginaryNode < Node
# Returns the value of the node as a Ruby Complex.
+ #--
+ #: () -> Complex
def value
Complex(0, numeric.value)
end
@@ -111,31 +171,25 @@ module Prism
class RationalNode < Node
# Returns the value of the node as a Ruby Rational.
+ #--
+ #: () -> Rational
def value
Rational(numerator, denominator)
end
-
- # Returns the value of the node as an IntegerNode or a FloatNode. This
- # method is deprecated in favor of #value or #numerator/#denominator.
- def numeric
- deprecated("value", "numerator", "denominator")
-
- if denominator == 1
- IntegerNode.new(source, -1, location.chop, flags, numerator)
- else
- FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
- end
- end
end
class ConstantReadNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -144,11 +198,15 @@ module Prism
class ConstantWriteNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -163,13 +221,15 @@ module Prism
# local variable
class DynamicPartsInConstantPathError < StandardError; end
- # An error class raised when missing nodes are found while computing a
+ # An error class raised when error recovery nodes are found while computing a
# constant path's full name. For example:
# Foo:: -> raises because the constant path is missing the last part
- class MissingNodesInConstantPathError < StandardError; end
+ class ErrorRecoveryNodesInConstantPathError < StandardError; end
# Returns the list of parts for the full name of this constant path.
# For example: [:Foo, :Bar]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
parts = [] #: Array[Symbol]
current = self #: node?
@@ -177,7 +237,7 @@ module Prism
while current.is_a?(ConstantPathNode)
name = current.name
if name.nil?
- raise MissingNodesInConstantPathError, "Constant path contains missing nodes. Cannot compute full name"
+ raise ErrorRecoveryNodesInConstantPathError, "Constant path contains error recovery nodes. Cannot compute full name"
end
parts.unshift(name)
@@ -192,30 +252,21 @@ module Prism
end
# Returns the full name of this constant path. For example: "Foo::Bar"
+ #--
+ #: () -> String
def full_name
full_name_parts.join("::")
end
-
- # Previously, we had a child node on this class that contained either a
- # constant read or a missing node. To not cause a breaking change, we
- # continue to supply that API.
- def child
- deprecated("name", "name_loc")
-
- if name
- ConstantReadNode.new(source, -1, name_loc, 0, name)
- else
- MissingNode.new(source, -1, location, 0)
- end
- end
end
class ConstantPathTargetNode < Node
# Returns the list of parts for the full name of this constant path.
# For example: [:Foo, :Bar]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
parts =
- case parent
+ case (parent = self.parent)
when ConstantPathNode, ConstantReadNode
parent.full_name_parts
when nil
@@ -225,40 +276,33 @@ module Prism
raise ConstantPathNode::DynamicPartsInConstantPathError, "Constant target path contains dynamic parts. Cannot compute full name"
end
- if name.nil?
- raise ConstantPathNode::MissingNodesInConstantPathError, "Constant target path contains missing nodes. Cannot compute full name"
+ if (name = self.name).nil?
+ raise ConstantPathNode::ErrorRecoveryNodesInConstantPathError, "Constant target path contains error recovery nodes. Cannot compute full name"
end
parts.push(name)
end
# Returns the full name of this constant path. For example: "Foo::Bar"
+ #--
+ #: () -> String
def full_name
full_name_parts.join("::")
end
-
- # Previously, we had a child node on this class that contained either a
- # constant read or a missing node. To not cause a breaking change, we
- # continue to supply that API.
- def child
- deprecated("name", "name_loc")
-
- if name
- ConstantReadNode.new(source, -1, name_loc, 0, name)
- else
- MissingNode.new(source, -1, location, 0)
- end
- end
end
class ConstantTargetNode < Node
# Returns the list of parts for the full name of this constant.
# For example: [:Foo]
+ #--
+ #: () -> Array[Symbol]
def full_name_parts
[name]
end
# Returns the full name of this constant. For example: "Foo"
+ #--
+ #: () -> String
def full_name
name.to_s
end
@@ -266,6 +310,8 @@ module Prism
class ParametersNode < Node
# Mirrors the Method#parameters method.
+ #--
+ #: () -> Array[[Symbol, Symbol] | [Symbol]]
def signature
names = [] #: Array[[Symbol, Symbol] | [Symbol]]
@@ -275,7 +321,7 @@ module Prism
optionals.each { |param| names << [:opt, param.name] }
- if rest && rest.is_a?(RestParameterNode)
+ if (rest = self.rest).is_a?(RestParameterNode)
names << [:rest, rest.name || :*]
end
@@ -283,8 +329,7 @@ module Prism
case param
when MultiTargetNode
names << [:req]
- when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
- # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
+ when ErrorRecoveryNode
raise "Invalid syntax"
else
names << [:req, param.name]
@@ -304,7 +349,7 @@ module Prism
keyopt.each { |param| names << [:key, param.name] }
- case keyword_rest
+ case (keyword_rest = self.keyword_rest)
when ForwardingParameterNode
names.concat([[:rest, :*], [:keyrest, :**], [:block, :&]])
when KeywordRestParameterNode
@@ -313,7 +358,13 @@ module Prism
names << [:nokey]
end
- names << [:block, block.name || :&] if block
+ case (block = self.block)
+ when BlockParameterNode
+ names << [:block, block.name || :&]
+ when NoBlockParameterNode
+ names << [:noblock]
+ end
+
names
end
end
@@ -328,181 +379,10 @@ module Prism
# can be any amount of space between the message and the = sign. However,
# sometimes you want the location of the full message including the inner
# space and the = sign. This method provides that.
+ #--
+ #: () -> Location?
def full_message_loc
attribute_write? ? message_loc&.adjoin("=") : message_loc
end
end
-
- class CallOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class ClassVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class ConstantOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class ConstantPathOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class GlobalVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class IndexOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class InstanceVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class LocalVariableOperatorWriteNode < Node
- # Returns the binary operator used to modify the receiver. This method is
- # deprecated in favor of #binary_operator.
- def operator
- deprecated("binary_operator")
- binary_operator
- end
-
- # Returns the location of the binary operator used to modify the receiver.
- # This method is deprecated in favor of #binary_operator_loc.
- def operator_loc
- deprecated("binary_operator_loc")
- binary_operator_loc
- end
- end
-
- class CaseMatchNode < Node
- # Returns the else clause of the case match node. This method is deprecated
- # in favor of #else_clause.
- def consequent
- deprecated("else_clause")
- else_clause
- end
- end
-
- class CaseNode < Node
- # Returns the else clause of the case node. This method is deprecated in
- # favor of #else_clause.
- def consequent
- deprecated("else_clause")
- else_clause
- end
- end
-
- class IfNode < Node
- # Returns the subsequent if/elsif/else clause of the if node. This method is
- # deprecated in favor of #subsequent.
- def consequent
- deprecated("subsequent")
- subsequent
- end
- end
-
- class RescueNode < Node
- # Returns the subsequent rescue clause of the rescue node. This method is
- # deprecated in favor of #subsequent.
- def consequent
- deprecated("subsequent")
- subsequent
- end
- end
-
- class UnlessNode < Node
- # Returns the else clause of the unless node. This method is deprecated in
- # favor of #else_clause.
- def consequent
- deprecated("else_clause")
- else_clause
- end
- end
end
diff --git a/lib/prism/node_find.rb b/lib/prism/node_find.rb
new file mode 100644
index 0000000000..697ee430e8
--- /dev/null
+++ b/lib/prism/node_find.rb
@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
+
+module Prism
+ # Finds the Prism AST node corresponding to a given Method, UnboundMethod,
+ # Proc, or Thread::Backtrace::Location. On CRuby, uses node_id from the
+ # instruction sequence for an exact match. On other implementations, falls
+ # back to best-effort matching by source location line number.
+ #
+ # This module is autoloaded so that programs that don't use Prism.find don't
+ # pay for its definition.
+ module NodeFind # :nodoc:
+ # Find the node for the given callable or backtrace location.
+ #--
+ #: (Method | UnboundMethod | Proc | Thread::Backtrace::Location callable, bool rubyvm) -> Node?
+ def self.find(callable, rubyvm)
+ case callable
+ when Proc
+ if rubyvm
+ RubyVMCallableFind.new.find(callable)
+ elsif callable.lambda?
+ LineLambdaFind.new.find(callable)
+ else
+ LineProcFind.new.find(callable)
+ end
+ when Method, UnboundMethod
+ if rubyvm
+ RubyVMCallableFind.new.find(callable)
+ else
+ LineMethodFind.new.find(callable)
+ end
+ when Thread::Backtrace::Location
+ if rubyvm
+ RubyVMBacktraceLocationFind.new.find(callable)
+ else
+ LineBacktraceLocationFind.new.find(callable)
+ end
+ else
+ raise ArgumentError, "Expected a Method, UnboundMethod, Proc, or Thread::Backtrace::Location, got #{callable.class}"
+ end
+ end
+
+ # Base class that handles parsing a file.
+ class Find
+ private
+
+ # Parse the given file path, returning a ParseResult or nil.
+ #--
+ #: (String? file) -> ParseResult?
+ def parse_file(file)
+ return unless file && File.readable?(file)
+ result = Prism.parse_file(file)
+ result if result.success?
+ end
+ end
+
+ # Finds the AST node for a Method, UnboundMethod, or Proc using the node_id
+ # from the instruction sequence.
+ class RubyVMCallableFind < Find
+ # Find the node for the given callable using the ISeq node_id.
+ #--
+ #: (Method | UnboundMethod | Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+ return unless (iseq = RubyVM::InstructionSequence.of(callable))
+
+ header = iseq.to_a[4]
+ return unless header[:parser] == :prism
+
+ result.value.find { |node| node.node_id == header[:node_id] }
+ end
+ end
+
+ # Finds the AST node for a Thread::Backtrace::Location using the node_id
+ # from the backtrace location.
+ class RubyVMBacktraceLocationFind < Find
+ # Find the node for the given backtrace location using node_id.
+ #--
+ #: (Thread::Backtrace::Location location) -> Node?
+ def find(location)
+ file = location.absolute_path || location.path
+ return unless (result = parse_file(file))
+ return unless RubyVM::AbstractSyntaxTree.respond_to?(:node_id_for_backtrace_location)
+
+ node_id = RubyVM::AbstractSyntaxTree.node_id_for_backtrace_location(location)
+
+ result.value.find { |node| node.node_id == node_id }
+ end
+ end
+
+ # Finds the AST node for a Method or UnboundMethod using best-effort line
+ # matching. Used on non-CRuby implementations.
+ class LineMethodFind < Find
+ # Find the node for the given method by matching on name and line.
+ #--
+ #: (Method | UnboundMethod callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ name = callable.name
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when DefNode
+ node.name == name && node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a lambda using best-effort line matching. Used
+ # on non-CRuby implementations.
+ class LineLambdaFind < Find
+ # Find the node for the given lambda by matching on line.
+ #--
+ #: (Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when LambdaNode
+ node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a non-lambda Proc using best-effort line
+ # matching. Used on non-CRuby implementations.
+ class LineProcFind < Find
+ # Find the node for the given proc by matching on line.
+ #--
+ #: (Proc callable) -> Node?
+ def find(callable)
+ return unless (source_location = callable.source_location)
+ return unless (result = parse_file(source_location[0]))
+
+ start_line = source_location[1]
+
+ result.value.find do |node|
+ case node
+ when ForNode
+ node.location.start_line == start_line
+ when CallNode
+ node.block.is_a?(BlockNode) && node.location.start_line == start_line
+ else
+ false
+ end
+ end
+ end
+ end
+
+ # Finds the AST node for a Thread::Backtrace::Location using best-effort
+ # line matching. Used on non-CRuby implementations.
+ class LineBacktraceLocationFind < Find
+ # Find the node for the given backtrace location by matching on line.
+ #--
+ #: (Thread::Backtrace::Location location) -> Node?
+ def find(location)
+ file = location.absolute_path || location.path
+ return unless (result = parse_file(file))
+
+ start_line = location.lineno
+ result.value.find { |node| node.location.start_line == start_line }
+ end
+ end
+ end
+end
diff --git a/lib/prism/pack.rb b/lib/prism/pack.rb
deleted file mode 100644
index c0de8ab8b7..0000000000
--- a/lib/prism/pack.rb
+++ /dev/null
@@ -1,228 +0,0 @@
-# frozen_string_literal: true
-# typed: ignore
-
-module Prism
- # A parser for the pack template language.
- module Pack
- %i[
- SPACE
- COMMENT
- INTEGER
- UTF8
- BER
- FLOAT
- STRING_SPACE_PADDED
- STRING_NULL_PADDED
- STRING_NULL_TERMINATED
- STRING_MSB
- STRING_LSB
- STRING_HEX_HIGH
- STRING_HEX_LOW
- STRING_UU
- STRING_MIME
- STRING_BASE64
- STRING_FIXED
- STRING_POINTER
- MOVE
- BACK
- NULL
-
- UNSIGNED
- SIGNED
- SIGNED_NA
-
- AGNOSTIC_ENDIAN
- LITTLE_ENDIAN
- BIG_ENDIAN
- NATIVE_ENDIAN
- ENDIAN_NA
-
- SIZE_SHORT
- SIZE_INT
- SIZE_LONG
- SIZE_LONG_LONG
- SIZE_8
- SIZE_16
- SIZE_32
- SIZE_64
- SIZE_P
- SIZE_NA
-
- LENGTH_FIXED
- LENGTH_MAX
- LENGTH_RELATIVE
- LENGTH_NA
- ].each do |const|
- const_set(const, const)
- end
-
- # A directive in the pack template language.
- class Directive
- # A symbol representing the version of Ruby.
- attr_reader :version
-
- # A symbol representing whether or not we are packing or unpacking.
- attr_reader :variant
-
- # A byteslice of the source string that this directive represents.
- attr_reader :source
-
- # The type of the directive.
- attr_reader :type
-
- # The type of signedness of the directive.
- attr_reader :signed
-
- # The type of endianness of the directive.
- attr_reader :endian
-
- # The size of the directive.
- attr_reader :size
-
- # The length type of this directive (used for integers).
- attr_reader :length_type
-
- # The length of this directive (used for integers).
- attr_reader :length
-
- # Initialize a new directive with the given values.
- def initialize(version, variant, source, type, signed, endian, size, length_type, length)
- @version = version
- @variant = variant
- @source = source
- @type = type
- @signed = signed
- @endian = endian
- @size = size
- @length_type = length_type
- @length = length
- end
-
- # The descriptions of the various types of endianness.
- ENDIAN_DESCRIPTIONS = {
- AGNOSTIC_ENDIAN: "agnostic",
- LITTLE_ENDIAN: "little-endian (VAX)",
- BIG_ENDIAN: "big-endian (network)",
- NATIVE_ENDIAN: "native-endian",
- ENDIAN_NA: "n/a"
- }
-
- # The descriptions of the various types of signedness.
- SIGNED_DESCRIPTIONS = {
- UNSIGNED: "unsigned",
- SIGNED: "signed",
- SIGNED_NA: "n/a"
- }
-
- # The descriptions of the various types of sizes.
- SIZE_DESCRIPTIONS = {
- SIZE_SHORT: "short",
- SIZE_INT: "int-width",
- SIZE_LONG: "long",
- SIZE_LONG_LONG: "long long",
- SIZE_8: "8-bit",
- SIZE_16: "16-bit",
- SIZE_32: "32-bit",
- SIZE_64: "64-bit",
- SIZE_P: "pointer-width"
- }
-
- # Provide a human-readable description of the directive.
- def describe
- case type
- when SPACE
- "whitespace"
- when COMMENT
- "comment"
- when INTEGER
- if size == SIZE_8
- base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
- else
- base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
- end
- case length_type
- when LENGTH_FIXED
- if length > 1
- base + ", x#{length}"
- else
- base
- end
- when LENGTH_MAX
- base + ", as many as possible"
- else
- raise
- end
- when UTF8
- "UTF-8 character"
- when BER
- "BER-compressed integer"
- when FLOAT
- "#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
- when STRING_SPACE_PADDED
- "arbitrary binary string (space padded)"
- when STRING_NULL_PADDED
- "arbitrary binary string (null padded, count is width)"
- when STRING_NULL_TERMINATED
- "arbitrary binary string (null padded, count is width), except that null is added with *"
- when STRING_MSB
- "bit string (MSB first)"
- when STRING_LSB
- "bit string (LSB first)"
- when STRING_HEX_HIGH
- "hex string (high nibble first)"
- when STRING_HEX_LOW
- "hex string (low nibble first)"
- when STRING_UU
- "UU-encoded string"
- when STRING_MIME
- "quoted printable, MIME encoding"
- when STRING_BASE64
- "base64 encoded string"
- when STRING_FIXED
- "pointer to a structure (fixed-length string)"
- when STRING_POINTER
- "pointer to a null-terminated string"
- when MOVE
- "move to absolute position"
- when BACK
- "back up a byte"
- when NULL
- "null byte"
- else
- raise
- end
- end
- end
-
- # The result of parsing a pack template.
- class Format
- # A list of the directives in the template.
- attr_reader :directives
-
- # The encoding of the template.
- attr_reader :encoding
-
- # Create a new Format with the given directives and encoding.
- def initialize(directives, encoding)
- @directives = directives
- @encoding = encoding
- end
-
- # Provide a human-readable description of the format.
- def describe
- source_width = directives.map { |d| d.source.inspect.length }.max
- directive_lines = directives.map do |directive|
- if directive.type == SPACE
- source = directive.source.inspect
- else
- source = directive.source
- end
- # @type var source_width: Integer
- " #{source.ljust(source_width)} #{directive.describe}"
- end
-
- (["Directives:"] + directive_lines + ["Encoding:", " #{encoding}"]).join("\n")
- end
- end
- end
-end
diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb
index 7aee20c9de..93d3c006b7 100644
--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@@ -1,6 +1,16 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
+ # @rbs!
+ # # An internal interface for a cache that can be used to compute code
+ # # units from byte offsets.
+ # interface _CodeUnitsCache
+ # def []: (Integer byte_offset) -> Integer
+ # end
+
# This represents a source of Ruby code that has been parsed. It is used in
# conjunction with locations to allow them to resolve line numbers and source
# ranges.
@@ -9,7 +19,18 @@ module Prism
# be used instead of `new` and it will return either a `Source` or a
# specialized and more performant `ASCIISource` if no multibyte characters
# are present in the source code.
- def self.for(source, start_line = 1, offsets = [])
+ #
+ # Note that if you are calling this method manually, you will need to supply
+ # the start_line and offsets parameters. start_line is the line number that
+ # the source starts on, which is typically 1 but can be different if this
+ # source is a subset of a larger source or if this is an eval. offsets is an
+ # array of byte offsets for the start of each line in the source code, which
+ # can be calculated by iterating through the source code and recording the
+ # byte offset whenever a newline character is encountered. The first
+ # element is always 0 to mark the first line.
+ #--
+ #: (String source, Integer start_line, Array[Integer] offsets) -> Source
+ def self.for(source, start_line, offsets)
if source.ascii_only?
ASCIISource.new(source, start_line, offsets)
elsif source.encoding == Encoding::BINARY
@@ -33,67 +54,122 @@ module Prism
end
# The source code that this source object represents.
- attr_reader :source
+ attr_reader :source #: String
# The line number where this source starts.
- attr_reader :start_line
+ attr_reader :start_line #: Integer
+
+ # The list of newline byte offsets in the source code. When initialized from
+ # the C extension, this may be a packed binary string of uint32_t values
+ # that is lazily unpacked on first access.
+ #--
+ #: () -> Array[Integer]
+ def offsets
+ offsets = @offsets
+ return offsets if offsets.is_a?(Array)
+ @offsets = offsets.unpack("L*")
+ end
+
+ # Create a new source object with the given source code. The offsets
+ # parameter can be either an Array of Integer byte offsets or a packed
+ # binary string of uint32_t values (from the C extension).
+ #--
+ #: (String source, Integer start_line, Array[Integer] | String offsets) -> void
+ def initialize(source, start_line, offsets)
+ @source = source
+ @start_line = start_line
+ @offsets = offsets
+ end
- # The list of newline byte offsets in the source code.
- attr_reader :offsets
+ # Replace the value of start_line with the given value.
+ #--
+ #: (Integer start_line) -> void
+ def replace_start_line(start_line)
+ @start_line = start_line
+ end
- # Create a new source object with the given source code.
- def initialize(source, start_line = 1, offsets = [])
- @source = source
- @start_line = start_line # set after parsing is done
- @offsets = offsets # set after parsing is done
+ # Replace the value of offsets with the given value.
+ #--
+ #: (Array[Integer] offsets) -> void
+ def replace_offsets(offsets)
+ @offsets = offsets
end
# Returns the encoding of the source code, which is set by parameters to the
# parser or by the encoding magic comment.
+ #--
+ #: () -> Encoding
def encoding
source.encoding
end
# Returns the lines of the source code as an array of strings.
+ #--
+ #: () -> Array[String]
def lines
source.lines
end
# Perform a byteslice on the source code using the given byte offset and
# byte length.
+ #--
+ #: (Integer byte_offset, Integer length) -> String
def slice(byte_offset, length)
source.byteslice(byte_offset, length) or raise
end
+ # Converts the line number and column in bytes to a byte offset.
+ #--
+ #: (Integer line, Integer column) -> Integer
+ def byte_offset(line, column)
+ normal = line - @start_line
+ raise IndexError if normal < 0
+ offsets.fetch(normal) + column
+ rescue IndexError
+ raise ArgumentError, "line #{line} is out of range"
+ end
+
# Binary search through the offsets to find the line number for the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line(byte_offset)
start_line + find_line(byte_offset)
end
# Return the byte offset of the start of the line corresponding to the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line_start(byte_offset)
offsets[find_line(byte_offset)]
end
# Returns the byte offset of the end of the line corresponding to the given
# byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def line_end(byte_offset)
offsets[find_line(byte_offset) + 1] || source.bytesize
end
- # Return the column number for the given byte offset.
+ # Return the column in bytes for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def column(byte_offset)
byte_offset - line_start(byte_offset)
end
# Return the character offset for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_offset(byte_offset)
(source.byteslice(0, byte_offset) or raise).length
end
- # Return the column number in characters for the given byte offset.
+ # Return the column in characters for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_column(byte_offset)
character_offset(byte_offset) - character_offset(line_start(byte_offset))
end
@@ -110,7 +186,11 @@ module Prism
# possible that the given byte offset will not occur on a character
# boundary. Second, it's possible that the source code will contain a
# character that has no equivalent in the given encoding.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_offset(byte_offset, encoding)
+ return byte_offset if encoding == Encoding::UTF_8
+
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
@@ -122,36 +202,36 @@ module Prism
# Generate a cache that targets a specific encoding for calculating code
# unit offsets.
+ #--
+ #: (Encoding encoding) -> CodeUnitsCache
def code_units_cache(encoding)
CodeUnitsCache.new(source, encoding)
end
- # Returns the column number in code units for the given encoding for the
+ # Returns the column in code units for the given encoding for the
# given byte offset.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_column(byte_offset, encoding)
code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
end
- private
+ # Freeze this object and the objects it contains.
+ #--
+ #: () -> void
+ def deep_freeze
+ source.freeze
+ offsets.freeze
+ freeze
+ end
- # Binary search through the offsets to find the line number for the given
+ # Binary search through the offsets to find the index for the given
# byte offset.
- def find_line(byte_offset)
- left = 0
- right = offsets.length - 1
-
- while left <= right
- mid = left + (right - left) / 2
- return mid if (offset = offsets[mid]) == byte_offset
-
- if offset < byte_offset
- left = mid + 1
- else
- right = mid - 1
- end
- end
-
- left - 1
+ #--
+ #: (Integer byte_offset) -> Integer
+ def find_line(byte_offset) # :nodoc:
+ index = offsets.bsearch_index { |offset| offset > byte_offset } || offsets.length
+ index - 1
end
end
@@ -170,38 +250,69 @@ module Prism
# has not yet been implemented.
#
class CodeUnitsCache
+ # Counter used for UTF-8, where one code unit equals one byte.
+ class UTF8Counter # :nodoc:
+ #: (Integer byte_offset, Integer byte_length) -> Integer
+ def count(byte_offset, byte_length)
+ byte_length
+ end
+ end
+
class UTF16Counter # :nodoc:
+ # @rbs @source: String
+ # @rbs @encoding: Encoding
+
+ #: (String source, Encoding encoding) -> void
def initialize(source, encoding)
@source = source
@encoding = encoding
end
+ #: (Integer byte_offset, Integer byte_length) -> Integer
def count(byte_offset, byte_length)
- @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
+ (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
end
end
- class LengthCounter # :nodoc:
+ # Counter used for UTF-32, where one code unit equals one code point and
+ # matches String#length. Also used as a best-effort fallback for any other
+ # encoding that does not have a dedicated counter.
+ class UTF32Counter # :nodoc:
+ # @rbs @source: String
+ # @rbs @encoding: Encoding
+
+ #: (String source, Encoding encoding) -> void
def initialize(source, encoding)
@source = source
@encoding = encoding
end
+ #: (Integer byte_offset, Integer byte_length) -> Integer
def count(byte_offset, byte_length)
- @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
+ (@source.byteslice(byte_offset, byte_length) or raise).encode(@encoding, invalid: :replace, undef: :replace).length
end
end
- private_constant :UTF16Counter, :LengthCounter
+ private_constant :UTF8Counter, :UTF16Counter, :UTF32Counter
+
+ # @rbs @source: String
+ # @rbs @counter: UTF8Counter | UTF16Counter | UTF32Counter
+ # @rbs @cache: Hash[Integer, Integer]
+ # @rbs @offsets: Array[Integer]
# Initialize a new cache with the given source and encoding.
+ #--
+ #: (String source, Encoding encoding) -> void
def initialize(source, encoding)
@source = source
@counter =
- if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
+ case encoding
+ when Encoding::UTF_8
+ UTF8Counter.new
+ when Encoding::UTF_16LE, Encoding::UTF_16BE
UTF16Counter.new(source, encoding)
else
- LengthCounter.new(source, encoding)
+ UTF32Counter.new(source, encoding)
end
@cache = {} #: Hash[Integer, Integer]
@@ -209,6 +320,8 @@ module Prism
end
# Retrieve the code units offset from the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def [](byte_offset)
@cache[byte_offset] ||=
if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
@@ -235,11 +348,15 @@ module Prism
# at that point we will treat everything as single-byte characters.
class ASCIISource < Source
# Return the character offset for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_offset(byte_offset)
byte_offset
end
- # Return the column number in characters for the given byte offset.
+ # Return the column in characters for the given byte offset.
+ #--
+ #: (Integer byte_offset) -> Integer
def character_column(byte_offset)
byte_offset - line_start(byte_offset)
end
@@ -250,6 +367,8 @@ module Prism
# This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
# concept of code units that differs from the number of characters in other
# encodings, it is not captured here.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_offset(byte_offset, encoding)
byte_offset
end
@@ -257,6 +376,8 @@ module Prism
# Returns a cache that is the identity function in order to maintain the
# same interface. We can do this because code units are always equivalent to
# byte offsets for ASCII-only sources.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
->(byte_offset) { byte_offset }
end
@@ -264,6 +385,8 @@ module Prism
# Specialized version of `code_units_column` that does not depend on
# `code_units_offset`, which is a more expensive operation. This is
# essentially the same as `Prism::Source#column`.
+ #--
+ #: (Integer byte_offset, Encoding encoding) -> Integer
def code_units_column(byte_offset, encoding)
byte_offset - line_start(byte_offset)
end
@@ -273,18 +396,23 @@ module Prism
class Location
# A Source object that is used to determine more information from the given
# offset and length.
- attr_reader :source
+ attr_reader :source #: Source
protected :source
# The byte offset from the beginning of the source where this location
# starts.
- attr_reader :start_offset
+ attr_reader :start_offset #: Integer
# The length of this location in bytes.
- attr_reader :length
+ attr_reader :length #: Integer
+
+ # @rbs @leading_comments: Array[Comment]?
+ # @rbs @trailing_comments: Array[Comment]?
# Create a new location object with the given source, start byte offset, and
# byte length.
+ #--
+ #: (Source source, Integer start_offset, Integer length) -> void
def initialize(source, start_offset, length)
@source = source
@start_offset = start_offset
@@ -299,53 +427,73 @@ module Prism
# These are the comments that are associated with this location that exist
# before the start of this location.
+ #--
+ #: () -> Array[Comment]
def leading_comments
@leading_comments ||= []
end
# Attach a comment to the leading comments of this location.
+ #--
+ #: (Comment comment) -> void
def leading_comment(comment)
leading_comments << comment
end
# These are the comments that are associated with this location that exist
# after the end of this location.
+ #--
+ #: () -> Array[Comment]
def trailing_comments
@trailing_comments ||= []
end
# Attach a comment to the trailing comments of this location.
+ #--
+ #: (Comment comment) -> void
def trailing_comment(comment)
trailing_comments << comment
end
# Returns all comments that are associated with this location (both leading
# and trailing comments).
+ #--
+ #: () -> Array[Comment]
def comments
- [*@leading_comments, *@trailing_comments]
+ [*@leading_comments, *@trailing_comments] #: Array[Comment]
end
# Create a new location object with the given options.
+ #--
+ #: (?source: Source, ?start_offset: Integer, ?length: Integer) -> Location
def copy(source: self.source, start_offset: self.start_offset, length: self.length)
Location.new(source, start_offset, length)
end
# Returns a new location that is the result of chopping off the last byte.
+ #--
+ #: () -> Location
def chop
copy(length: length == 0 ? length : length - 1)
end
# Returns a string representation of this location.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::Location @start_offset=#{@start_offset} @length=#{@length} start_line=#{start_line}>"
end
# Returns all of the lines of the source code associated with this location.
+ #--
+ #: () -> Array[String]
def source_lines
source.lines
end
# The source code that this location represents.
+ #--
+ #: () -> String
def slice
source.slice(start_offset, length)
end
@@ -353,6 +501,8 @@ module Prism
# The source code that this location represents starting from the beginning
# of the line that this location starts on to the end of the line that this
# location ends on.
+ #--
+ #: () -> String
def slice_lines
line_start = source.line_start(start_offset)
line_end = source.line_end(end_offset)
@@ -361,118 +511,160 @@ module Prism
# The character offset from the beginning of the source where this location
# starts.
+ #--
+ #: () -> Integer
def start_character_offset
source.character_offset(start_offset)
end
# The offset from the start of the file in code units of the given encoding.
+ #--
+ #: (Encoding encoding) -> Integer
def start_code_units_offset(encoding = Encoding::UTF_16LE)
source.code_units_offset(start_offset, encoding)
end
# The start offset from the start of the file in code units using the given
# cache to fetch or calculate the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_start_code_units_offset(cache)
cache[start_offset]
end
# The byte offset from the beginning of the source where this location ends.
+ #--
+ #: () -> Integer
def end_offset
start_offset + length
end
# The character offset from the beginning of the source where this location
# ends.
+ #--
+ #: () -> Integer
def end_character_offset
source.character_offset(end_offset)
end
# The offset from the start of the file in code units of the given encoding.
+ #--
+ #: (Encoding encoding) -> Integer
def end_code_units_offset(encoding = Encoding::UTF_16LE)
source.code_units_offset(end_offset, encoding)
end
# The end offset from the start of the file in code units using the given
# cache to fetch or calculate the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_end_code_units_offset(cache)
cache[end_offset]
end
# The line number where this location starts.
+ #--
+ #: () -> Integer
def start_line
source.line(start_offset)
end
# The content of the line where this location starts before this location.
+ #--
+ #: () -> String
def start_line_slice
offset = source.line_start(start_offset)
source.slice(offset, start_offset - offset)
end
# The line number where this location ends.
+ #--
+ #: () -> Integer
def end_line
source.line(end_offset)
end
- # The column number in bytes where this location starts from the start of
+ # The column in bytes where this location starts from the start of
# the line.
+ #--
+ #: () -> Integer
def start_column
source.column(start_offset)
end
- # The column number in characters where this location ends from the start of
+ # The column in characters where this location ends from the start of
# the line.
+ #--
+ #: () -> Integer
def start_character_column
source.character_column(start_offset)
end
- # The column number in code units of the given encoding where this location
+ # The column in code units of the given encoding where this location
# starts from the start of the line.
+ #--
+ #: (?Encoding encoding) -> Integer
def start_code_units_column(encoding = Encoding::UTF_16LE)
source.code_units_column(start_offset, encoding)
end
# The start column in code units using the given cache to fetch or calculate
# the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_start_code_units_column(cache)
cache[start_offset] - cache[source.line_start(start_offset)]
end
- # The column number in bytes where this location ends from the start of the
+ # The column in bytes where this location ends from the start of the
# line.
+ #--
+ #: () -> Integer
def end_column
source.column(end_offset)
end
- # The column number in characters where this location ends from the start of
+ # The column in characters where this location ends from the start of
# the line.
+ #--
+ #: () -> Integer
def end_character_column
source.character_column(end_offset)
end
- # The column number in code units of the given encoding where this location
+ # The column in code units of the given encoding where this location
# ends from the start of the line.
+ #--
+ #: (?Encoding encoding) -> Integer
def end_code_units_column(encoding = Encoding::UTF_16LE)
source.code_units_column(end_offset, encoding)
end
# The end column in code units using the given cache to fetch or calculate
# the value.
+ #--
+ #: (_CodeUnitsCache cache) -> Integer
def cached_end_code_units_column(cache)
cache[end_offset] - cache[source.line_start(end_offset)]
end
# Implement the hash pattern matching interface for Location.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ start_offset: start_offset, end_offset: end_offset }
end
# Implement the pretty print interface for Location.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.text("(#{start_line},#{start_column})-(#{end_line},#{end_column})")
end
# Returns true if the given other location is equal to this location.
+ #--
+ #: (untyped other) -> bool
def ==(other)
Location === other &&
other.start_offset == start_offset &&
@@ -482,6 +674,8 @@ module Prism
# Returns a new location that stretches from this location to the given
# other location. Raises an error if this location is not before the other
# location or if they don't share the same source.
+ #--
+ #: (Location other) -> Location
def join(other)
raise "Incompatible sources" if source != other.source
raise "Incompatible locations" if start_offset > other.start_offset
@@ -492,6 +686,8 @@ module Prism
# Join this location with the first occurrence of the string in the source
# that occurs after this location on the same line, and return the new
# location. This will raise an error if the string does not exist.
+ #--
+ #: (String string) -> Location
def adjoin(string)
line_suffix = source.slice(end_offset, source.line_end(end_offset) - end_offset)
@@ -505,23 +701,38 @@ module Prism
# This represents a comment that was encountered during parsing. It is the
# base class for all comment types.
class Comment
- # The location of this comment in the source.
- attr_reader :location
+ # The Location of this comment in the source.
+ attr_reader :location #: Location
# Create a new comment object with the given location.
+ #--
+ #: (Location location) -> void
def initialize(location)
@location = location
end
# Implement the hash pattern matching interface for Comment.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ location: location }
end
# Returns the content of the comment by slicing it from the source code.
+ #--
+ #: () -> String
def slice
location.slice
end
+
+ # Returns true if this comment happens on the same line as other code and
+ # false if the comment is by itself. This can only be true for inline
+ # comments and should be false for block comments.
+ #--
+ #: () -> bool
+ def trailing?
+ raise NotImplementedError, "trailing? is not implemented for #{self.class}"
+ end
end
# InlineComment objects are the most common. They correspond to comments in
@@ -529,12 +740,16 @@ module Prism
class InlineComment < Comment
# Returns true if this comment happens on the same line as other code and
# false if the comment is by itself.
+ #--
+ #: () -> bool
def trailing?
!location.start_line_slice.strip.empty?
end
# Returns a string representation of this comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::InlineComment @location=#{location.inspect}>"
end
end
@@ -542,13 +757,17 @@ module Prism
# EmbDocComment objects correspond to comments that are surrounded by =begin
# and =end.
class EmbDocComment < Comment
- # This can only be true for inline comments.
+ # Returns false. This can only be true for inline comments.
+ #--
+ #: () -> bool
def trailing?
false
end
# Returns a string representation of this comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::EmbDocComment @location=#{location.inspect}>"
end
end
@@ -556,34 +775,44 @@ module Prism
# This represents a magic comment that was encountered during parsing.
class MagicComment
# A Location object representing the location of the key in the source.
- attr_reader :key_loc
+ attr_reader :key_loc #: Location
# A Location object representing the location of the value in the source.
- attr_reader :value_loc
+ attr_reader :value_loc #: Location
# Create a new magic comment object with the given key and value locations.
+ #--
+ #: (Location key_loc, Location value_loc) -> void
def initialize(key_loc, value_loc)
@key_loc = key_loc
@value_loc = value_loc
end
# Returns the key of the magic comment by slicing it from the source code.
+ #--
+ #: () -> String
def key
key_loc.slice
end
# Returns the value of the magic comment by slicing it from the source code.
+ #--
+ #: () -> String
def value
value_loc.slice
end
# Implement the hash pattern matching interface for MagicComment.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ key_loc: key_loc, value_loc: value_loc }
end
# Returns a string representation of this magic comment.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
end
end
@@ -592,18 +821,20 @@ module Prism
class ParseError
# The type of error. This is an _internal_ symbol that is used for
# communicating with translation layers. It is not meant to be public API.
- attr_reader :type
+ attr_reader :type #: Symbol
# The message associated with this error.
- attr_reader :message
+ attr_reader :message #: String
# A Location object representing the location of this error in the source.
- attr_reader :location
+ attr_reader :location #: Location
# The level of this error.
- attr_reader :level
+ attr_reader :level #: Symbol
# Create a new error object with the given message and location.
+ #--
+ #: (Symbol type, String message, Location location, Symbol level) -> void
def initialize(type, message, location, level)
@type = type
@message = message
@@ -612,12 +843,16 @@ module Prism
end
# Implement the hash pattern matching interface for ParseError.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, message: message, location: location, level: level }
end
# Returns a string representation of this error.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::ParseError @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
end
end
@@ -626,18 +861,20 @@ module Prism
class ParseWarning
# The type of warning. This is an _internal_ symbol that is used for
# communicating with translation layers. It is not meant to be public API.
- attr_reader :type
+ attr_reader :type #: Symbol
# The message associated with this warning.
- attr_reader :message
+ attr_reader :message #: String
# A Location object representing the location of this warning in the source.
- attr_reader :location
+ attr_reader :location #: Location
# The level of this warning.
- attr_reader :level
+ attr_reader :level #: Symbol
# Create a new warning object with the given message and location.
+ #--
+ #: (Symbol type, String message, Location location, Symbol level) -> void
def initialize(type, message, location, level)
@type = type
@message = message
@@ -646,73 +883,116 @@ module Prism
end
# Implement the hash pattern matching interface for ParseWarning.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, message: message, location: location, level: level }
end
# Returns a string representation of this warning.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
"#<Prism::ParseWarning @type=#{@type.inspect} @message=#{@message.inspect} @location=#{@location.inspect} @level=#{@level.inspect}>"
end
end
- # This represents the result of a call to ::parse or ::parse_file. It contains
- # the requested structure, any comments that were encounters, and any errors
- # that were encountered.
+ # This represents the result of a call to Prism.parse or Prism.parse_file.
+ # It contains the requested structure, any comments that were encounters,
+ # and any errors that were encountered.
class Result
# The list of comments that were encountered during parsing.
- attr_reader :comments
+ attr_reader :comments #: Array[Comment]
# The list of magic comments that were encountered during parsing.
- attr_reader :magic_comments
+ attr_reader :magic_comments #: Array[MagicComment]
# An optional location that represents the location of the __END__ marker
# and the rest of the content of the file. This content is loaded into the
# DATA constant when the file being parsed is the main file being executed.
- attr_reader :data_loc
+ attr_reader :data_loc #: Location?
# The list of errors that were generated during parsing.
- attr_reader :errors
+ attr_reader :errors #: Array[ParseError]
# The list of warnings that were generated during parsing.
- attr_reader :warnings
+ attr_reader :warnings #: Array[ParseWarning]
# A Source instance that represents the source code that was parsed.
- attr_reader :source
+ attr_reader :source #: Source
# Create a new result object with the given values.
- def initialize(comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(comments, magic_comments, data_loc, errors, warnings, continuable, source)
@comments = comments
@magic_comments = magic_comments
@data_loc = data_loc
@errors = errors
@warnings = warnings
+ @continuable = continuable
@source = source
end
# Implement the hash pattern matching interface for Result.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
end
# Returns the encoding of the source code that was parsed.
+ #--
+ #: () -> Encoding
def encoding
source.encoding
end
# Returns true if there were no errors during parsing and false if there
# were.
+ #--
+ #: () -> bool
def success?
errors.empty?
end
# Returns true if there were errors during parsing and false if there were
# not.
+ #--
+ #: () -> bool
def failure?
!success?
end
+ # Returns true if the parsed source is an incomplete expression that could
+ # become valid with additional input. This is useful for REPL contexts (such
+ # as IRB) where the user may be entering a multi-line expression one line at
+ # a time and the implementation needs to determine whether to wait for more
+ # input or to evaluate what has been entered so far.
+ #
+ # Concretely, this returns true when every error present is caused by the
+ # parser reaching the end of the input before a construct was closed (e.g.
+ # an unclosed string, array, block, or keyword), and returns false when any
+ # error is caused by a token that makes the input structurally invalid
+ # regardless of what might follow (e.g. a stray `end`, `]`, or `)` with no
+ # matching opener).
+ #
+ # Examples:
+ #
+ # Prism.parse("1 + [").continuable? #=> true (unclosed array)
+ # Prism.parse("1 + ]").continuable? #=> false (stray ])
+ # Prism.parse("tap do").continuable? #=> true (unclosed block)
+ # Prism.parse("end.tap do").continuable? #=> false (stray end)
+ #
+ #--
+ #: () -> bool
+ def continuable?
+ @continuable
+ end
+
# Create a code units cache for the given encoding.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
source.code_units_cache(encoding)
end
@@ -729,32 +1009,42 @@ module Prism
private_constant :Newlines
# The syntax tree that was parsed from the source code.
- attr_reader :value
+ attr_reader :value #: ProgramNode
# Create a new parse result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (ProgramNode value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for ParseResult.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
# Attach the list of comments to their respective locations in the tree.
+ #--
+ #: () -> void
def attach_comments!
Comments.new(self).attach! # steep:ignore
end
# Walk the tree and mark nodes that are on a new line, loosely emulating
# the behavior of CRuby's `:line` tracepoint event.
+ #--
+ #: () -> void
def mark_newlines!
value.accept(Newlines.new(source.offsets.size)) # steep:ignore
end
# Returns a string representation of the syntax tree with the errors
# displayed inline.
+ #--
+ #: () -> String
def errors_format
Errors.new(self).format
end
@@ -763,16 +1053,20 @@ module Prism
# This is a result specific to the `lex` and `lex_file` methods.
class LexResult < Result
# The list of tokens that were parsed from the source code.
- attr_reader :value
+ attr_reader :value #: Array[[Token, Integer]]
# Create a new lex result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: (Array[[Token, Integer]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for LexResult.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
end
@@ -781,16 +1075,20 @@ module Prism
class ParseLexResult < Result
# A tuple of the syntax tree and the list of tokens that were parsed from
# the source code.
- attr_reader :value
+ attr_reader :value #: [ProgramNode, Array[[Token, Integer]]]
# Create a new parse lex result object with the given values.
- def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+ #--
+ #: ([ProgramNode, Array[[Token, Integer]]] value, Array[Comment] comments, Array[MagicComment] magic_comments, Location? data_loc, Array[ParseError] errors, Array[ParseWarning] warnings, bool continuable, Source source) -> void
+ def initialize(value, comments, magic_comments, data_loc, errors, warnings, continuable, source)
@value = value
- super(comments, magic_comments, data_loc, errors, warnings, source)
+ super(comments, magic_comments, data_loc, errors, warnings, continuable, source)
end
# Implement the hash pattern matching interface for ParseLexResult.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
super.merge!(value: value)
end
end
@@ -798,16 +1096,20 @@ module Prism
# This represents a token from the Ruby source.
class Token
# The Source object that represents the source this token came from.
- attr_reader :source
+ attr_reader :source #: Source
private :source
# The type of token that this token is.
- attr_reader :type
+ attr_reader :type #: Symbol
# A byteslice of the source that this token represents.
- attr_reader :value
+ attr_reader :value #: String
+
+ # @rbs @location: Location | Integer
# Create a new token object with the given type, value, and location.
+ #--
+ #: (Source source, Symbol type, String value, Location | Integer location) -> void
def initialize(source, type, value, location)
@source = source
@type = type
@@ -816,11 +1118,15 @@ module Prism
end
# Implement the hash pattern matching interface for Token.
- def deconstruct_keys(keys)
+ #--
+ #: (Array[Symbol]? keys) -> Hash[Symbol, untyped]
+ def deconstruct_keys(keys) # :nodoc:
{ type: type, value: value, location: location }
end
# A Location object representing the location of this token in the source.
+ #--
+ #: () -> Location
def location
location = @location
return location if location.is_a?(Location)
@@ -828,7 +1134,9 @@ module Prism
end
# Implement the pretty print interface for Token.
- def pretty_print(q)
+ #--
+ #: (PP q) -> void
+ def pretty_print(q) # :nodoc:
q.group do
q.text(type.to_s)
self.location.pretty_print(q)
@@ -843,6 +1151,8 @@ module Prism
end
# Returns true if the given other token is equal to this token.
+ #--
+ #: (untyped other) -> bool
def ==(other)
Token === other &&
other.type == type &&
@@ -850,9 +1160,52 @@ module Prism
end
# Returns a string representation of this token.
- def inspect
+ #--
+ #: () -> String
+ def inspect # :nodoc:
location
super
end
+
+ # Freeze this object and the objects it contains.
+ #--
+ #: () -> void
+ def deep_freeze
+ value.freeze
+ location.freeze
+ freeze
+ end
+ end
+
+ # This object is passed to the various Prism.* methods that accept the
+ # `scopes` option as an element of the list. It defines both the local
+ # variables visible at that scope as well as the forwarding parameters
+ # available at that scope.
+ class Scope
+ # The list of local variables that are defined in this scope. This should be
+ # defined as an array of symbols.
+ attr_reader :locals #: Array[Symbol]
+
+ # The list of local variables that are forwarded to the next scope. This
+ # should by defined as an array of symbols containing the specific values of
+ # :*, :**, :&, or :"...".
+ attr_reader :forwarding #: Array[Symbol]
+
+ # Create a new scope object with the given locals and forwarding.
+ #--
+ #: (Array[Symbol] locals, Array[Symbol] forwarding) -> void
+ def initialize(locals, forwarding)
+ @locals = locals
+ @forwarding = forwarding
+ end
+ end
+
+ # Create a new scope with the given locals and forwarding options that is
+ # suitable for passing into one of the Prism.* methods that accepts the
+ # `scopes` option.
+ #--
+ #: (?locals: Array[Symbol], ?forwarding: Array[Symbol]) -> Scope
+ def self.scope(locals: [], forwarding: [])
+ Scope.new(locals, forwarding)
end
end
diff --git a/lib/prism/parse_result/comments.rb b/lib/prism/parse_result/comments.rb
index 22c4148b2c..df80792d39 100644
--- a/lib/prism/parse_result/comments.rb
+++ b/lib/prism/parse_result/comments.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class ParseResult < Result
@@ -17,32 +20,49 @@ module Prism
# the comment. Otherwise it will favor attaching to the nearest location
# that is after the comment.
class Comments
+ # @rbs!
+ # # An internal interface for a target that comments can be attached
+ # # to. This is either going to be a NodeTarget or a CommentTarget.
+ # interface _CommentTarget
+ # def start_offset: () -> Integer
+ # def end_offset: () -> Integer
+ # def encloses?: (Comment) -> bool
+ # def leading_comment: (Comment) -> void
+ # def trailing_comment: (Comment) -> void
+ # end
+
# A target for attaching comments that is based on a specific node's
# location.
class NodeTarget # :nodoc:
- attr_reader :node
+ attr_reader :node #: node
+ #: (node node) -> void
def initialize(node)
@node = node
end
+ #: () -> Integer
def start_offset
node.start_offset
end
+ #: () -> Integer
def end_offset
node.end_offset
end
+ #: (Comment comment) -> bool
def encloses?(comment)
start_offset <= comment.location.start_offset &&
comment.location.end_offset <= end_offset
end
+ #: (Comment comment) -> void
def leading_comment(comment)
node.location.leading_comment(comment)
end
+ #: (Comment comment) -> void
def trailing_comment(comment)
node.location.trailing_comment(comment)
end
@@ -51,44 +71,54 @@ module Prism
# A target for attaching comments that is based on a location field on a
# node. For example, the `end` token of a ClassNode.
class LocationTarget # :nodoc:
- attr_reader :location
+ attr_reader :location #: Location
+ #: (Location location) -> void
def initialize(location)
@location = location
end
+ #: () -> Integer
def start_offset
location.start_offset
end
+ #: () -> Integer
def end_offset
location.end_offset
end
+ #: (Comment comment) -> bool
def encloses?(comment)
false
end
+ #: (Comment comment) -> void
def leading_comment(comment)
location.leading_comment(comment)
end
+ #: (Comment comment) -> void
def trailing_comment(comment)
location.trailing_comment(comment)
end
end
# The parse result that we are attaching comments to.
- attr_reader :parse_result
+ attr_reader :parse_result #: ParseResult
# Create a new Comments object that will attach comments to the given
# parse result.
+ #--
+ #: (ParseResult parse_result) -> void
def initialize(parse_result)
@parse_result = parse_result
end
# Attach the comments to their respective locations in the tree by
# mutating the parse result.
+ #--
+ #: () -> void
def attach!
parse_result.comments.each do |comment|
preceding, enclosing, following = nearest_targets(parse_result.value, comment)
@@ -116,11 +146,13 @@ module Prism
# Responsible for finding the nearest targets to the given comment within
# the context of the given encapsulating node.
+ #--
+ #: (node node, Comment comment) -> [_CommentTarget?, _CommentTarget?, _CommentTarget?]
def nearest_targets(node, comment)
comment_start = comment.location.start_offset
comment_end = comment.location.end_offset
- targets = [] #: Array[_Target]
+ targets = [] #: Array[_CommentTarget]
node.comment_targets.map do |value|
case value
when StatementsNode
@@ -133,8 +165,8 @@ module Prism
end
targets.sort_by!(&:start_offset)
- preceding = nil #: _Target?
- following = nil #: _Target?
+ preceding = nil #: _CommentTarget?
+ following = nil #: _CommentTarget?
left = 0
right = targets.length
diff --git a/lib/prism/parse_result/errors.rb b/lib/prism/parse_result/errors.rb
index eb4f317248..388309d23d 100644
--- a/lib/prism/parse_result/errors.rb
+++ b/lib/prism/parse_result/errors.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
require "stringio"
@@ -8,14 +11,18 @@ module Prism
# can be used to format the errors in a human-readable way.
class Errors
# The parse result that contains the errors.
- attr_reader :parse_result
+ attr_reader :parse_result #: ParseResult
# Initialize a new set of errors from the given parse result.
+ #--
+ #: (ParseResult parse_result) -> void
def initialize(parse_result)
@parse_result = parse_result
end
# Formats the errors in a human-readable way and return them as a string.
+ #--
+ #: () -> String
def format
error_lines = {} #: Hash[Integer, Array[ParseError]]
parse_result.errors.each do |error|
diff --git a/lib/prism/parse_result/newlines.rb b/lib/prism/parse_result/newlines.rb
index a04fa78a75..450c790226 100644
--- a/lib/prism/parse_result/newlines.rb
+++ b/lib/prism/parse_result/newlines.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
class ParseResult < Result
@@ -23,13 +26,20 @@ module Prism
# that case. We do that to avoid storing the extra `@newline` instance
# variable on every node if we don't need it.
class Newlines < Visitor
+ # The map of lines indices to whether or not they have been marked as
+ # emitting a newline event.
+ # @rbs @lines: Array[bool]
+
# Create a new Newlines visitor with the given newline offsets.
+ #--
+ #: (Integer lines) -> void
def initialize(lines)
- # @type var lines: Integer
@lines = Array.new(1 + lines, false)
end
- # Permit block/lambda nodes to mark newlines within themselves.
+ # Permit block nodes to mark newlines within themselves.
+ #--
+ #: (BlockNode node) -> void
def visit_block_node(node)
old_lines = @lines
@lines = Array.new(old_lines.size, false)
@@ -41,17 +51,39 @@ module Prism
end
end
- alias_method :visit_lambda_node, :visit_block_node
+ # Permit lambda nodes to mark newlines within themselves.
+ #--
+ #: (LambdaNode node) -> void
+ def visit_lambda_node(node)
+ old_lines = @lines
+ @lines = Array.new(old_lines.size, false)
+
+ begin
+ super(node)
+ ensure
+ @lines = old_lines
+ end
+ end
- # Mark if/unless nodes as newlines.
+ # Mark if nodes as newlines.
+ #--
+ #: (IfNode node) -> void
def visit_if_node(node)
node.newline_flag!(@lines)
super(node)
end
- alias_method :visit_unless_node, :visit_if_node
+ # Mark unless nodes as newlines.
+ #--
+ #: (UnlessNode node) -> void
+ def visit_unless_node(node)
+ node.newline_flag!(@lines)
+ super(node)
+ end
# Permit statements lists to mark newlines within themselves.
+ #--
+ #: (StatementsNode node) -> void
def visit_statements_node(node)
node.body.each do |child|
child.newline_flag!(@lines)
@@ -62,10 +94,16 @@ module Prism
end
class Node
+ # Tracks whether or not this node should emit a newline event when the
+ # instructions that it represents are executed.
+ # @rbs @newline_flag: bool
+
+ #: () -> bool
def newline_flag? # :nodoc:
- @newline_flag ? true : false
+ !!defined?(@newline_flag)
end
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
line = location.start_line
unless lines[line]
@@ -76,48 +114,56 @@ module Prism
end
class BeginNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
# Never mark BeginNode with a newline flag, mark children instead.
end
end
class ParenthesesNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
# Never mark ParenthesesNode with a newline flag, mark children instead.
end
end
class IfNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class UnlessNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class UntilNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class WhileNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
predicate.newline_flag!(lines)
end
end
class RescueModifierNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
expression.newline_flag!(lines)
end
end
class InterpolatedMatchLastLineNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -125,6 +171,7 @@ module Prism
end
class InterpolatedRegularExpressionNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -132,6 +179,7 @@ module Prism
end
class InterpolatedStringNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -139,6 +187,7 @@ module Prism
end
class InterpolatedSymbolNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
@@ -146,6 +195,7 @@ module Prism
end
class InterpolatedXStringNode < Node
+ #: (Array[bool] lines) -> void
def newline_flag!(lines) # :nodoc:
first = parts.first
first.newline_flag!(lines) if first
diff --git a/lib/prism/pattern.rb b/lib/prism/pattern.rb
index 03fec26789..be0493df05 100644
--- a/lib/prism/pattern.rb
+++ b/lib/prism/pattern.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# A pattern is an object that wraps a Ruby pattern matching expression. The
@@ -40,7 +43,9 @@ module Prism
class CompilationError < StandardError
# Create a new CompilationError with the given representation of the node
# that caused the error.
- def initialize(repr)
+ #--
+ #: (String repr) -> void
+ def initialize(repr) # :nodoc:
super(<<~ERROR)
prism was unable to compile the pattern you provided into a usable
expression. It failed on to understand the node represented by:
@@ -56,10 +61,13 @@ module Prism
end
# The query that this pattern was initialized with.
- attr_reader :query
+ attr_reader :query #: String
+ # @rbs @compiled: Proc?
# Create a new pattern with the given query. The query should be a string
# containing a Ruby pattern matching expression.
+ #--
+ #: (String query) -> void
def initialize(query)
@query = query
@compiled = nil
@@ -67,6 +75,8 @@ module Prism
# Compile the query into a callable object that can be used to match against
# nodes.
+ #--
+ #: () -> Proc
def compile
result = Prism.parse("case nil\nin #{query}\nend")
@@ -83,7 +93,10 @@ module Prism
# pattern. If a block is given, it will be called with each node that
# matches the pattern. If no block is given, an enumerator will be returned
# that will yield each node that matches the pattern.
- def scan(root)
+ #--
+ #: (node root) -> Enumerator[node, void]
+ #: (node root) { (node) -> void } -> void
+ def scan(root, &blk)
return to_enum(:scan, root) unless block_given?
@compiled ||= compile
@@ -99,23 +112,33 @@ module Prism
# Shortcut for combining two procs into one that returns true if both return
# true.
- def combine_and(left, right)
+ #--
+ #: (Proc left, Proc right) -> Proc
+ def combine_and(left, right) # :nodoc:
->(other) { left.call(other) && right.call(other) }
end
# Shortcut for combining two procs into one that returns true if either
# returns true.
- def combine_or(left, right)
+ #--
+ #: (Proc left, Proc right) -> Proc
+ def combine_or(left, right) # :nodoc:
->(other) { left.call(other) || right.call(other) }
end
- # Raise an error because the given node is not supported.
- def compile_error(node)
+ # Raise an error because the given node is not supported. Note purposefully
+ # not typing this method since it is a no return method that Steep does not
+ # understand.
+ #--
+ #: (node node) -> bot
+ def compile_error(node) # :nodoc:
raise CompilationError, node.inspect
end
# in [foo, bar, baz]
- def compile_array_pattern_node(node)
+ #--
+ #: (ArrayPatternNode node) -> Proc
+ def compile_array_pattern_node(node) # :nodoc:
compile_error(node) if !node.rest.nil? || node.posts.any?
constant = node.constant
@@ -140,12 +163,16 @@ module Prism
end
# in foo | bar
- def compile_alternation_pattern_node(node)
+ #--
+ #: (AlternationPatternNode node) -> Proc
+ def compile_alternation_pattern_node(node) # :nodoc:
combine_or(compile_node(node.left), compile_node(node.right))
end
# in Prism::ConstantReadNode
- def compile_constant_path_node(node)
+ #--
+ #: (ConstantPathNode node) -> Proc
+ def compile_constant_path_node(node) # :nodoc:
parent = node.parent
if parent.is_a?(ConstantReadNode) && parent.slice == "Prism"
@@ -160,12 +187,16 @@ module Prism
# in ConstantReadNode
# in String
- def compile_constant_read_node(node)
+ #--
+ #: (ConstantReadNode node) -> Proc
+ def compile_constant_read_node(node) # :nodoc:
compile_constant_name(node, node.name)
end
# Compile a name associated with a constant.
- def compile_constant_name(node, name)
+ #--
+ #: ((ConstantPathNode | ConstantReadNode) node, Symbol name) -> Proc
+ def compile_constant_name(node, name) # :nodoc:
if Prism.const_defined?(name, false)
clazz = Prism.const_get(name)
@@ -181,9 +212,14 @@ module Prism
# in InstanceVariableReadNode[name: Symbol]
# in { name: Symbol }
- def compile_hash_pattern_node(node)
+ #--
+ #: (HashPatternNode node) -> Proc
+ def compile_hash_pattern_node(node) # :nodoc:
compile_error(node) if node.rest
- compiled_constant = compile_node(node.constant) if node.constant
+
+ if (constant = node.constant)
+ compiled_constant = compile_node(constant)
+ end
preprocessed =
node.elements.to_h do |element|
@@ -211,12 +247,16 @@ module Prism
end
# in nil
- def compile_nil_node(node)
+ #--
+ #: (NilNode node) -> Proc
+ def compile_nil_node(node) # :nodoc:
->(attribute) { attribute.nil? }
end
# in /foo/
- def compile_regular_expression_node(node)
+ #--
+ #: (RegularExpressionNode node) -> Proc
+ def compile_regular_expression_node(node) # :nodoc:
regexp = Regexp.new(node.unescaped, node.closing[1..])
->(attribute) { regexp === attribute }
@@ -224,7 +264,9 @@ module Prism
# in ""
# in "foo"
- def compile_string_node(node)
+ #--
+ #: (StringNode node) -> Proc
+ def compile_string_node(node) # :nodoc:
string = node.unescaped
->(attribute) { string === attribute }
@@ -232,7 +274,9 @@ module Prism
# in :+
# in :foo
- def compile_symbol_node(node)
+ #--
+ #: (SymbolNode node) -> Proc
+ def compile_symbol_node(node) # :nodoc:
symbol = node.unescaped.to_sym
->(attribute) { symbol === attribute }
@@ -240,7 +284,9 @@ module Prism
# Compile any kind of node. Dispatch out to the individual compilation
# methods based on the type of node.
- def compile_node(node)
+ #--
+ #: (node node) -> Proc
+ def compile_node(node) # :nodoc:
case node
when AlternationPatternNode
compile_alternation_pattern_node(node)
diff --git a/lib/prism/polyfill/append_as_bytes.rb b/lib/prism/polyfill/append_as_bytes.rb
new file mode 100644
index 0000000000..24218bd171
--- /dev/null
+++ b/lib/prism/polyfill/append_as_bytes.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+# Polyfill for String#append_as_bytes, which didn't exist until Ruby 3.4.
+if !("".respond_to?(:append_as_bytes))
+ String.include(
+ Module.new {
+ def append_as_bytes(*args)
+ args.each do |arg|
+ arg = Integer === arg ? [arg].pack("C") : arg.b
+ self.<<(arg) # steep:ignore
+ end
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/scan_byte.rb b/lib/prism/polyfill/scan_byte.rb
new file mode 100644
index 0000000000..9276e509fc
--- /dev/null
+++ b/lib/prism/polyfill/scan_byte.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+require "strscan"
+
+# Polyfill for StringScanner#scan_byte, which didn't exist until Ruby 3.4.
+if !(StringScanner.method_defined?(:scan_byte))
+ StringScanner.include(
+ Module.new {
+ def scan_byte # :nodoc:
+ get_byte&.b&.ord
+ end
+ }
+ )
+end
diff --git a/lib/prism/polyfill/warn.rb b/lib/prism/polyfill/warn.rb
new file mode 100644
index 0000000000..76a4264623
--- /dev/null
+++ b/lib/prism/polyfill/warn.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+# Polyfill for Kernel#warn with the category parameter. Not all Ruby engines
+# have Method#parameters implemented, so we check the arity instead if
+# necessary.
+if (method = Kernel.instance_method(:warn)).respond_to?(:parameters) ? method.parameters.none? { |_, name| name == :category } : (method.arity == -1)
+ Kernel.prepend(
+ Module.new {
+ def warn(*msgs, uplevel: nil, category: nil) # :nodoc:
+ case uplevel
+ when nil
+ super(*msgs)
+ when Integer
+ super(*msgs, uplevel: uplevel + 1)
+ else
+ super(*msgs, uplevel: uplevel.to_int + 1)
+ end
+ end
+ }
+ )
+
+ Object.prepend(
+ Module.new {
+ def warn(*msgs, uplevel: nil, category: nil) # :nodoc:
+ case uplevel
+ when nil
+ super(*msgs)
+ when Integer
+ super(*msgs, uplevel: uplevel + 1)
+ else
+ super(*msgs, uplevel: uplevel.to_int + 1)
+ end
+ end
+ }
+ )
+end
diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec
index 0682ca9b1c..aac056b3f8 100644
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@@ -2,7 +2,7 @@
Gem::Specification.new do |spec|
spec.name = "prism"
- spec.version = "1.2.0"
+ spec.version = "1.9.0"
spec.authors = ["Shopify"]
spec.email = ["ruby@shopify.com"]
@@ -42,31 +42,69 @@ Gem::Specification.new do |spec|
"docs/serialization.md",
"docs/testing.md",
"ext/prism/api_node.c",
- "ext/prism/api_pack.c",
+ "ext/prism/extconf.rb",
"ext/prism/extension.c",
"ext/prism/extension.h",
"include/prism.h",
+ "include/prism/compiler/accel.h",
+ "include/prism/compiler/align.h",
+ "include/prism/compiler/exported.h",
+ "include/prism/compiler/fallthrough.h",
+ "include/prism/compiler/filesystem.h",
+ "include/prism/compiler/flex_array.h",
+ "include/prism/compiler/force_inline.h",
+ "include/prism/compiler/format.h",
+ "include/prism/compiler/inline.h",
+ "include/prism/compiler/nodiscard.h",
+ "include/prism/compiler/nonnull.h",
+ "include/prism/compiler/unused.h",
+ "include/prism/internal/allocator.h",
+ "include/prism/internal/allocator_debug.h",
+ "include/prism/internal/arena.h",
+ "include/prism/internal/bit.h",
+ "include/prism/internal/buffer.h",
+ "include/prism/internal/char.h",
+ "include/prism/internal/comments.h",
+ "include/prism/internal/constant_pool.h",
+ "include/prism/internal/diagnostic.h",
+ "include/prism/internal/encoding.h",
+ "include/prism/internal/integer.h",
+ "include/prism/internal/isinf.h",
+ "include/prism/internal/line_offset_list.h",
+ "include/prism/internal/list.h",
+ "include/prism/internal/magic_comments.h",
+ "include/prism/internal/memchr.h",
+ "include/prism/internal/node.h",
+ "include/prism/internal/options.h",
+ "include/prism/internal/parser.h",
+ "include/prism/internal/regexp.h",
+ "include/prism/internal/serialize.h",
+ "include/prism/internal/source.h",
+ "include/prism/internal/static_literals.h",
+ "include/prism/internal/strncasecmp.h",
+ "include/prism/internal/stringy.h",
+ "include/prism/internal/strpbrk.h",
+ "include/prism/internal/tokens.h",
+ "include/prism/arena.h",
"include/prism/ast.h",
- "include/prism/defines.h",
+ "include/prism/buffer.h",
+ "include/prism/comments.h",
+ "include/prism/constant_pool.h",
"include/prism/diagnostic.h",
- "include/prism/encoding.h",
+ "include/prism/excludes.h",
+ "include/prism/integer.h",
+ "include/prism/json.h",
+ "include/prism/line_offset_list.h",
+ "include/prism/magic_comments.h",
"include/prism/node.h",
"include/prism/options.h",
- "include/prism/pack.h",
"include/prism/parser.h",
"include/prism/prettyprint.h",
- "include/prism/regexp.h",
- "include/prism/static_literals.h",
- "include/prism/util/pm_buffer.h",
- "include/prism/util/pm_char.h",
- "include/prism/util/pm_constant_pool.h",
- "include/prism/util/pm_integer.h",
- "include/prism/util/pm_list.h",
- "include/prism/util/pm_memchr.h",
- "include/prism/util/pm_newline_list.h",
- "include/prism/util/pm_strncasecmp.h",
- "include/prism/util/pm_string.h",
- "include/prism/util/pm_strpbrk.h",
+ "include/prism/serialize.h",
+ "include/prism/source.h",
+ "include/prism/stream.h",
+ "include/prism/string_query.h",
+ "include/prism/stringy.h",
"include/prism/version.h",
"lib/prism.rb",
"lib/prism/compiler.rb",
@@ -79,84 +117,112 @@ Gem::Specification.new do |spec|
"lib/prism/lex_compat.rb",
"lib/prism/mutation_compiler.rb",
"lib/prism/node_ext.rb",
+ "lib/prism/node_find.rb",
"lib/prism/node.rb",
- "lib/prism/pack.rb",
"lib/prism/parse_result.rb",
"lib/prism/parse_result/comments.rb",
"lib/prism/parse_result/errors.rb",
"lib/prism/parse_result/newlines.rb",
"lib/prism/pattern.rb",
+ "lib/prism/polyfill/append_as_bytes.rb",
"lib/prism/polyfill/byteindex.rb",
+ "lib/prism/polyfill/scan_byte.rb",
"lib/prism/polyfill/unpack1.rb",
+ "lib/prism/polyfill/warn.rb",
"lib/prism/reflection.rb",
"lib/prism/relocation.rb",
"lib/prism/serialize.rb",
"lib/prism/string_query.rb",
"lib/prism/translation.rb",
"lib/prism/translation/parser.rb",
- "lib/prism/translation/parser33.rb",
- "lib/prism/translation/parser34.rb",
+ "lib/prism/translation/parser_current.rb",
+ "lib/prism/translation/parser_versions.rb",
+ "lib/prism/translation/parser/builder.rb",
"lib/prism/translation/parser/compiler.rb",
"lib/prism/translation/parser/lexer.rb",
"lib/prism/translation/ripper.rb",
+ "lib/prism/translation/ripper/filter.rb",
+ "lib/prism/translation/ripper/lexer.rb",
"lib/prism/translation/ripper/sexp.rb",
"lib/prism/translation/ripper/shim.rb",
"lib/prism/translation/ruby_parser.rb",
"lib/prism/visitor.rb",
"prism.gemspec",
- "rbi/prism.rbi",
- "rbi/prism/compiler.rbi",
- "rbi/prism/dsl.rbi",
- "rbi/prism/inspect_visitor.rbi",
- "rbi/prism/node_ext.rbi",
- "rbi/prism/node.rbi",
- "rbi/prism/parse_result.rbi",
- "rbi/prism/reflection.rbi",
- "rbi/prism/string_query.rbi",
+ "rbi/generated/prism.rbi",
+ "rbi/generated/prism/compiler.rbi",
+ "rbi/generated/prism/desugar_compiler.rbi",
+ "rbi/generated/prism/dispatcher.rbi",
+ "rbi/generated/prism/dot_visitor.rbi",
+ "rbi/generated/prism/dsl.rbi",
+ "rbi/generated/prism/inspect_visitor.rbi",
+ "rbi/generated/prism/lex_compat.rbi",
+ "rbi/generated/prism/mutation_compiler.rbi",
+ "rbi/generated/prism/node.rbi",
+ "rbi/generated/prism/node_ext.rbi",
+ "rbi/generated/prism/node_find.rbi",
+ "rbi/generated/prism/parse_result.rbi",
+ "rbi/generated/prism/pattern.rbi",
+ "rbi/generated/prism/reflection.rbi",
+ "rbi/generated/prism/relocation.rbi",
+ "rbi/generated/prism/serialize.rbi",
+ "rbi/generated/prism/string_query.rbi",
+ "rbi/generated/prism/translation.rbi",
+ "rbi/generated/prism/visitor.rbi",
+ "rbi/generated/prism/parse_result/comments.rbi",
+ "rbi/generated/prism/parse_result/errors.rbi",
+ "rbi/generated/prism/parse_result/newlines.rbi",
"rbi/prism/translation/parser.rbi",
- "rbi/prism/translation/parser33.rbi",
- "rbi/prism/translation/parser34.rbi",
+ "rbi/prism/translation/parser_versions.rbi",
"rbi/prism/translation/ripper.rbi",
- "rbi/prism/visitor.rbi",
- "sig/prism.rbs",
- "sig/prism/compiler.rbs",
- "sig/prism/dispatcher.rbs",
- "sig/prism/dot_visitor.rbs",
- "sig/prism/dsl.rbs",
- "sig/prism/inspect_visitor.rbs",
- "sig/prism/lex_compat.rbs",
- "sig/prism/mutation_compiler.rbs",
- "sig/prism/node_ext.rbs",
- "sig/prism/node.rbs",
- "sig/prism/pack.rbs",
- "sig/prism/parse_result.rbs",
- "sig/prism/pattern.rbs",
- "sig/prism/reflection.rbs",
- "sig/prism/relocation.rbs",
- "sig/prism/serialize.rbs",
- "sig/prism/string_query.rbs",
- "sig/prism/visitor.rbs",
+ "rbi/rubyvm/node_find.rbi",
+ "sig/generated/prism.rbs",
+ "sig/generated/prism/compiler.rbs",
+ "sig/generated/prism/desugar_compiler.rbs",
+ "sig/generated/prism/dispatcher.rbs",
+ "sig/generated/prism/dot_visitor.rbs",
+ "sig/generated/prism/dsl.rbs",
+ "sig/generated/prism/inspect_visitor.rbs",
+ "sig/generated/prism/lex_compat.rbs",
+ "sig/generated/prism/mutation_compiler.rbs",
+ "sig/generated/prism/node.rbs",
+ "sig/generated/prism/node_ext.rbs",
+ "sig/generated/prism/node_find.rbs",
+ "sig/generated/prism/parse_result.rbs",
+ "sig/generated/prism/pattern.rbs",
+ "sig/generated/prism/reflection.rbs",
+ "sig/generated/prism/relocation.rbs",
+ "sig/generated/prism/serialize.rbs",
+ "sig/generated/prism/string_query.rbs",
+ "sig/generated/prism/translation.rbs",
+ "sig/generated/prism/visitor.rbs",
+ "sig/generated/prism/parse_result/comments.rbs",
+ "sig/generated/prism/parse_result/errors.rbs",
+ "sig/generated/prism/parse_result/newlines.rbs",
+ "src/arena.c",
+ "src/buffer.c",
+ "src/char.c",
+ "src/constant_pool.c",
"src/diagnostic.c",
"src/encoding.c",
+ "src/integer.c",
+ "src/json.c",
+ "src/line_offset_list.c",
+ "src/list.c",
+ "src/memchr.c",
"src/node.c",
"src/options.c",
- "src/pack.c",
+ "src/parser.c",
"src/prettyprint.c",
"src/prism.c",
"src/regexp.c",
"src/serialize.c",
+ "src/source.c",
"src/static_literals.c",
- "src/token_type.c",
- "src/util/pm_buffer.c",
- "src/util/pm_char.c",
- "src/util/pm_constant_pool.c",
- "src/util/pm_integer.c",
- "src/util/pm_list.c",
- "src/util/pm_memchr.c",
- "src/util/pm_newline_list.c",
- "src/util/pm_string.c",
- "src/util/pm_strncasecmp.c",
- "src/util/pm_strpbrk.c"
+ "src/string_query.c",
+ "src/stringy.c",
+ "src/strncasecmp.c",
+ "src/strpbrk.c",
+ "src/tokens.c"
]
spec.extensions = ["ext/prism/extconf.rb"]
diff --git a/lib/prism/relocation.rb b/lib/prism/relocation.rb
index 163d2012c5..af0f792827 100644
--- a/lib/prism/relocation.rb
+++ b/lib/prism/relocation.rb
@@ -1,4 +1,7 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# Prism parses deterministically for the same input. This provides a nice
@@ -11,6 +14,33 @@ module Prism
# "save" nodes and locations using a minimal amount of memory (just the
# node_id and a field identifier) and then reify them later.
module Relocation
+ # @rbs!
+ # type entry_value = untyped
+ # type entry_values = Hash[Symbol, entry_value]
+ #
+ # interface _Value
+ # def start_line: () -> Integer
+ # def end_line: () -> Integer
+ # def start_offset: () -> Integer
+ # def end_offset: () -> Integer
+ # def start_character_offset: () -> Integer
+ # def end_character_offset: () -> Integer
+ # def cached_start_code_units_offset: (_CodeUnitsCache cache) -> Integer
+ # def cached_end_code_units_offset: (_CodeUnitsCache cache) -> Integer
+ # def start_column: () -> Integer
+ # def end_column: () -> Integer
+ # def start_character_column: () -> Integer
+ # def end_character_column: () -> Integer
+ # def cached_start_code_units_column: (_CodeUnitsCache cache) -> Integer
+ # def cached_end_code_units_column: (_CodeUnitsCache cache) -> Integer
+ # def leading_comments: () -> Array[Comment]
+ # def trailing_comments: () -> Array[Comment]
+ # end
+ #
+ # interface _Field
+ # def fields: (_Value value) -> entry_values
+ # end
+
# An entry in a repository that will lazily reify its values when they are
# first accessed.
class Entry
@@ -20,109 +50,152 @@ module Prism
class MissingValueError < StandardError
end
+ # @rbs @repository: Repository?
+ # @rbs @values: Hash[Symbol, untyped]?
+
# Initialize a new entry with the given repository.
+ #--
+ #: (Repository repository) -> void
def initialize(repository)
@repository = repository
@values = nil
end
# Fetch the filepath of the value.
+ #--
+ #: () -> String
def filepath
fetch_value(:filepath)
end
# Fetch the start line of the value.
+ #--
+ #: () -> Integer
def start_line
fetch_value(:start_line)
end
# Fetch the end line of the value.
+ #--
+ #: () -> Integer
def end_line
fetch_value(:end_line)
end
# Fetch the start byte offset of the value.
+ #--
+ #: () -> Integer
def start_offset
fetch_value(:start_offset)
end
# Fetch the end byte offset of the value.
+ #--
+ #: () -> Integer
def end_offset
fetch_value(:end_offset)
end
# Fetch the start character offset of the value.
+ #--
+ #: () -> Integer
def start_character_offset
fetch_value(:start_character_offset)
end
# Fetch the end character offset of the value.
+ #--
+ #: () -> Integer
def end_character_offset
fetch_value(:end_character_offset)
end
# Fetch the start code units offset of the value, for the encoding that
# was configured on the repository.
+ #--
+ #: () -> Integer
def start_code_units_offset
fetch_value(:start_code_units_offset)
end
# Fetch the end code units offset of the value, for the encoding that was
# configured on the repository.
+ #--
+ #: () -> Integer
def end_code_units_offset
fetch_value(:end_code_units_offset)
end
# Fetch the start byte column of the value.
+ #--
+ #: () -> Integer
def start_column
fetch_value(:start_column)
end
# Fetch the end byte column of the value.
+ #--
+ #: () -> Integer
def end_column
fetch_value(:end_column)
end
# Fetch the start character column of the value.
+ #--
+ #: () -> Integer
def start_character_column
fetch_value(:start_character_column)
end
# Fetch the end character column of the value.
+ #--
+ #: () -> Integer
def end_character_column
fetch_value(:end_character_column)
end
# Fetch the start code units column of the value, for the encoding that
# was configured on the repository.
+ #--
+ #: () -> Integer
def start_code_units_column
fetch_value(:start_code_units_column)
end
# Fetch the end code units column of the value, for the encoding that was
# configured on the repository.
+ #--
+ #: () -> Integer
def end_code_units_column
fetch_value(:end_code_units_column)
end
# Fetch the leading comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
def leading_comments
fetch_value(:leading_comments)
end
# Fetch the trailing comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
def trailing_comments
fetch_value(:trailing_comments)
end
# Fetch the leading and trailing comments of the value.
+ #--
+ #: () -> Array[CommentsField::Comment]
def comments
- leading_comments.concat(trailing_comments)
+ [*leading_comments, *trailing_comments]
end
# Reify the values on this entry with the given values. This is an
# internal-only API that is called from the repository when it is time to
# reify the values.
+ #--
+ #: (entry_values values) -> void
def reify!(values) # :nodoc:
@repository = nil
@values = values
@@ -131,6 +204,8 @@ module Prism
private
# Fetch a value from the entry, raising an error if it is missing.
+ #--
+ #: (Symbol name) -> entry_value
def fetch_value(name)
values.fetch(name) do
raise MissingValueError, "No value for #{name}, make sure the " \
@@ -139,27 +214,35 @@ module Prism
end
# Return the values from the repository, reifying them if necessary.
+ #--
+ #: () -> entry_values
def values
- @values || (@repository.reify!; @values)
+ @values || (@repository&.reify!; @values) #: entry_values
end
end
# Represents the source of a repository that will be reparsed.
class Source
# The value that will need to be reparsed.
- attr_reader :value
+ attr_reader :value #: untyped
# Initialize the source with the given value.
+ #--
+ #: (untyped value) -> void
def initialize(value)
@value = value
end
# Reparse the value and return the parse result.
+ #--
+ #: () -> ParseResult
def result
raise NotImplementedError, "Subclasses must implement #result"
end
# Create a code units cache for the given encoding.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
result.code_units_cache(encoding)
end
@@ -168,6 +251,8 @@ module Prism
# A source that is represented by a file path.
class SourceFilepath < Source
# Reparse the file and return the parse result.
+ #--
+ #: () -> ParseResult
def result
Prism.parse_file(value)
end
@@ -176,6 +261,8 @@ module Prism
# A source that is represented by a string.
class SourceString < Source
# Reparse the string and return the parse result.
+ #--
+ #: () -> ParseResult
def result
Prism.parse(value)
end
@@ -184,14 +271,18 @@ module Prism
# A field that represents the file path.
class FilepathField
# The file path that this field represents.
- attr_reader :value
+ attr_reader :value #: String
# Initialize a new field with the given file path.
+ #--
+ #: (String value) -> void
def initialize(value)
@value = value
end
# Fetch the file path.
+ #--
+ #: (_Value _value) -> entry_values
def fields(_value)
{ filepath: value }
end
@@ -200,6 +291,8 @@ module Prism
# A field representing the start and end lines.
class LinesField
# Fetches the start and end line of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ start_line: value.start_line, end_line: value.end_line }
end
@@ -208,6 +301,8 @@ module Prism
# A field representing the start and end byte offsets.
class OffsetsField
# Fetches the start and end byte offset of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ start_offset: value.start_offset, end_offset: value.end_offset }
end
@@ -216,6 +311,8 @@ module Prism
# A field representing the start and end character offsets.
class CharacterOffsetsField
# Fetches the start and end character offset of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_character_offset: value.start_character_offset,
@@ -228,12 +325,16 @@ module Prism
class CodeUnitOffsetsField
# A pointer to the repository object that is used for lazily creating a
# code units cache.
- attr_reader :repository
+ attr_reader :repository #: Repository
# The associated encoding for the code units.
- attr_reader :encoding
+ attr_reader :encoding #: Encoding
+
+ # @rbs @cache: _CodeUnitsCache?
# Initialize a new field with the associated repository and encoding.
+ #--
+ #: (Repository repository, Encoding encoding) -> void
def initialize(repository, encoding)
@repository = repository
@encoding = encoding
@@ -242,6 +343,8 @@ module Prism
# Fetches the start and end code units offset of a value for a particular
# encoding.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_code_units_offset: value.cached_start_code_units_offset(cache),
@@ -252,6 +355,8 @@ module Prism
private
# Lazily create a code units cache for the associated encoding.
+ #--
+ #: () -> _CodeUnitsCache
def cache
@cache ||= repository.code_units_cache(encoding)
end
@@ -260,6 +365,8 @@ module Prism
# A field representing the start and end byte columns.
class ColumnsField
# Fetches the start and end byte column of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ start_column: value.start_column, end_column: value.end_column }
end
@@ -268,6 +375,8 @@ module Prism
# A field representing the start and end character columns.
class CharacterColumnsField
# Fetches the start and end character column of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_character_column: value.start_character_column,
@@ -281,12 +390,16 @@ module Prism
class CodeUnitColumnsField
# The repository object that is used for lazily creating a code units
# cache.
- attr_reader :repository
+ attr_reader :repository #: Repository
# The associated encoding for the code units.
- attr_reader :encoding
+ attr_reader :encoding #: Encoding
+
+ # @rbs @cache: _CodeUnitsCache?
# Initialize a new field with the associated repository and encoding.
+ #--
+ #: (Repository repository, Encoding encoding) -> void
def initialize(repository, encoding)
@repository = repository
@encoding = encoding
@@ -295,6 +408,8 @@ module Prism
# Fetches the start and end code units column of a value for a particular
# encoding.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{
start_code_units_column: value.cached_start_code_units_column(cache),
@@ -305,6 +420,8 @@ module Prism
private
# Lazily create a code units cache for the associated encoding.
+ #--
+ #: () -> _CodeUnitsCache
def cache
@cache ||= repository.code_units_cache(encoding)
end
@@ -315,9 +432,11 @@ module Prism
# An object that represents a slice of a comment.
class Comment
# The slice of the comment.
- attr_reader :slice
+ attr_reader :slice #: String
# Initialize a new comment with the given slice.
+ #
+ #: (String slice) -> void
def initialize(slice)
@slice = slice
end
@@ -326,6 +445,8 @@ module Prism
private
# Create comment objects from the given values.
+ #--
+ #: (entry_value values) -> Array[Comment]
def comments(values)
values.map { |value| Comment.new(value.slice) }
end
@@ -334,6 +455,8 @@ module Prism
# A field representing the leading comments.
class LeadingCommentsField < CommentsField
# Fetches the leading comments of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ leading_comments: comments(value.leading_comments) }
end
@@ -342,6 +465,8 @@ module Prism
# A field representing the trailing comments.
class TrailingCommentsField < CommentsField
# Fetches the trailing comments of a value.
+ #--
+ #: (_Value value) -> entry_values
def fields(value)
{ trailing_comments: comments(value.trailing_comments) }
end
@@ -357,15 +482,17 @@ module Prism
# The source associated with this repository. This will be either a
# SourceFilepath (the most common use case) or a SourceString.
- attr_reader :source
+ attr_reader :source #: Source
# The fields that have been configured on this repository.
- attr_reader :fields
+ attr_reader :fields #: Hash[Symbol, _Field]
# The entries that have been saved on this repository.
- attr_reader :entries
+ attr_reader :entries #: Hash[Integer, Hash[Symbol, Entry]]
# Initialize a new repository with the given source.
+ #--
+ #: (Source source) -> void
def initialize(source)
@source = source
@fields = {}
@@ -373,69 +500,93 @@ module Prism
end
# Create a code units cache for the given encoding from the source.
+ #--
+ #: (Encoding encoding) -> _CodeUnitsCache
def code_units_cache(encoding)
source.code_units_cache(encoding)
end
# Configure the filepath field for this repository and return self.
+ #--
+ #: () -> self
def filepath
raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath)
field(:filepath, FilepathField.new(source.value))
end
# Configure the lines field for this repository and return self.
+ #--
+ #: () -> self
def lines
field(:lines, LinesField.new)
end
# Configure the offsets field for this repository and return self.
+ #--
+ #: () -> self
def offsets
field(:offsets, OffsetsField.new)
end
# Configure the character offsets field for this repository and return
# self.
+ #--
+ #: () -> self
def character_offsets
field(:character_offsets, CharacterOffsetsField.new)
end
# Configure the code unit offsets field for this repository for a specific
# encoding and return self.
+ #--
+ #: (Encoding encoding) -> self
def code_unit_offsets(encoding)
field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding))
end
# Configure the columns field for this repository and return self.
+ #--
+ #: () -> self
def columns
field(:columns, ColumnsField.new)
end
# Configure the character columns field for this repository and return
# self.
+ #--
+ #: () -> self
def character_columns
field(:character_columns, CharacterColumnsField.new)
end
# Configure the code unit columns field for this repository for a specific
# encoding and return self.
+ #--
+ #: (Encoding encoding) -> self
def code_unit_columns(encoding)
field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding))
end
# Configure the leading comments field for this repository and return
# self.
+ #--
+ #: () -> self
def leading_comments
field(:leading_comments, LeadingCommentsField.new)
end
# Configure the trailing comments field for this repository and return
# self.
+ #--
+ #: () -> self
def trailing_comments
field(:trailing_comments, TrailingCommentsField.new)
end
# Configure both the leading and trailing comment fields for this
# repository and return self.
+ #--
+ #: () -> self
def comments
leading_comments.trailing_comments
end
@@ -443,6 +594,8 @@ module Prism
# This method is called from nodes and locations when they want to enter
# themselves into the repository. It it internal-only and meant to be
# called from the #save* APIs.
+ #--
+ #: (Integer node_id, Symbol field_name) -> Entry
def enter(node_id, field_name) # :nodoc:
entry = Entry.new(self)
@entries[node_id][field_name] = entry
@@ -452,6 +605,8 @@ module Prism
# This method is called from the entries in the repository when they need
# to reify their values. It is internal-only and meant to be called from
# the various value APIs.
+ #--
+ #: () -> void
def reify! # :nodoc:
result = source.result
@@ -465,7 +620,7 @@ module Prism
while (node = queue.shift)
@entries[node.node_id].each do |field_name, entry|
value = node.public_send(field_name)
- values = {} #: Hash[Symbol, untyped]
+ values = {} #: entry_values
fields.each_value do |field|
values.merge!(field.fields(value))
@@ -484,6 +639,8 @@ module Prism
# Append the given field to the repository and return the repository so
# that these calls can be chained.
+ #--
+ #: (Symbol name, _Field) -> self
def field(name, value)
raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name)
@fields[name] = value
@@ -492,11 +649,15 @@ module Prism
end
# Create a new repository for the given filepath.
+ #--
+ #: (String value) -> Repository
def self.filepath(value)
Repository.new(SourceFilepath.new(value))
end
# Create a new repository for the given string.
+ #--
+ #: (String value) -> Repository
def self.string(value)
Repository.new(SourceString.new(value))
end
diff --git a/lib/prism/string_query.rb b/lib/prism/string_query.rb
index 9011051d2b..99ce57e5fe 100644
--- a/lib/prism/string_query.rb
+++ b/lib/prism/string_query.rb
@@ -1,28 +1,44 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# Query methods that allow categorizing strings based on their context for
# where they could be valid in a Ruby syntax tree.
class StringQuery
+ # @rbs!
+ # def self.local?: (String string) -> bool
+ # def self.constant?: (String string) -> bool
+ # def self.method_name?: (String string) -> bool
+
# The string that this query is wrapping.
- attr_reader :string
+ attr_reader :string #: String
# Initialize a new query with the given string.
+ #--
+ #: (String string) -> void
def initialize(string)
@string = string
end
# Whether or not this string is a valid local variable name.
+ #--
+ #: () -> bool
def local?
StringQuery.local?(string)
end
# Whether or not this string is a valid constant name.
+ #--
+ #: () -> bool
def constant?
StringQuery.constant?(string)
end
# Whether or not this string is a valid method name.
+ #--
+ #: () -> bool
def method_name?
StringQuery.method_name?(string)
end
diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb
index 8b75e8a3ab..5a086a7542 100644
--- a/lib/prism/translation.rb
+++ b/lib/prism/translation.rb
@@ -1,12 +1,19 @@
# frozen_string_literal: true
+# :markup: markdown
+#--
+# rbs_inline: enabled
module Prism
# This module is responsible for converting the prism syntax tree into other
# syntax trees.
module Translation # steep:ignore
autoload :Parser, "prism/translation/parser"
- autoload :Parser33, "prism/translation/parser33"
- autoload :Parser34, "prism/translation/parser34"
+ autoload :ParserCurrent, "prism/translation/parser_current"
+ autoload :Parser33, "prism/translation/parser_versions"
+ autoload :Parser34, "prism/translation/parser_versions"
+ autoload :Parser35, "prism/translation/parser_versions"
+ autoload :Parser40, "prism/translation/parser_versions"
+ autoload :Parser41, "prism/translation/parser_versions"
autoload :Ripper, "prism/translation/ripper"
autoload :RubyParser, "prism/translation/ruby_parser"
end
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb
index 969f2b95b0..70031f133a 100644
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@@ -1,9 +1,15 @@
# frozen_string_literal: true
+# :markup: markdown
begin
+ required_version = ">= 3.3.7.2"
+ gem "parser", required_version
require "parser"
rescue LoadError
- warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.})
+ warn(<<~MSG)
+ Error: Unable to load parser #{required_version}. \
+ Add `gem "parser"` to your Gemfile or run `bundle update parser`.
+ MSG
exit(1)
end
@@ -13,6 +19,13 @@ module Prism
# whitequark/parser gem's syntax tree. It inherits from the base parser for
# the parser gem, and overrides the parse* methods to parse with prism and
# then translate.
+ #
+ # Note that this version of the parser always parses using the latest
+ # version of Ruby syntax supported by Prism. If you want specific version
+ # support, use one of the version-specific subclasses, such as
+ # `Prism::Translation::Parser34`. If you want to parse using the same
+ # version of Ruby syntax as the currently running version of Ruby, use
+ # `Prism::Translation::ParserCurrent`.
class Parser < ::Parser::Base
Diagnostic = ::Parser::Diagnostic # :nodoc:
private_constant :Diagnostic
@@ -20,7 +33,7 @@ module Prism
# The parser gem has a list of diagnostics with a hard-coded set of error
# messages. We create our own diagnostic class in order to set our own
# error messages.
- class PrismDiagnostic < Diagnostic
+ class PrismDiagnostic < Diagnostic # :nodoc:
# This is the cached message coming from prism.
attr_reader :message
@@ -33,8 +46,45 @@ module Prism
Racc_debug_parser = false # :nodoc:
+ # The `builder` argument is used to create the parser using our custom builder class by default.
+ #
+ # By using the `:parser` keyword argument, you can translate in a way that is compatible with
+ # the Parser gem using any parser.
+ #
+ # For example, in RuboCop for Ruby LSP, the following approach can be used to improve performance
+ # by reusing a pre-parsed `Prism::ParseLexResult`:
+ #
+ # class PrismPreparsed
+ # def initialize(prism_result)
+ # @prism_result = prism_result
+ # end
+ #
+ # def parse_lex(source, **options)
+ # @prism_result
+ # end
+ # end
+ #
+ # prism_preparsed = PrismPreparsed.new(prism_result)
+ #
+ # Prism::Translation::Ruby34.new(builder, parser: prism_preparsed)
+ #
+ # In an object passed to the `:parser` keyword argument, the `parse` and `parse_lex` methods
+ # should be implemented as needed.
+ #
+ def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
+ if !builder.is_a?(Prism::Translation::Parser::Builder)
+ warn(<<~MSG, uplevel: 1, category: :deprecated)
+ [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
+ `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
+ MSG
+ end
+ @parser = parser
+
+ super(builder)
+ end
+
def version # :nodoc:
- 34
+ 41
end
# The default encoding for Ruby files is UTF-8.
@@ -51,7 +101,7 @@ module Prism
source = source_buffer.source
offset_cache = build_offset_cache(source)
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
+ result = unwrap(@parser.parse(source, **prism_options), offset_cache)
build_ast(result.value, offset_cache)
ensure
@@ -64,7 +114,7 @@ module Prism
source = source_buffer.source
offset_cache = build_offset_cache(source)
- result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
+ result = unwrap(@parser.parse(source, **prism_options), offset_cache)
[
build_ast(result.value, offset_cache),
@@ -83,7 +133,7 @@ module Prism
offset_cache = build_offset_cache(source)
result =
begin
- unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
+ unwrap(@parser.parse_lex(source, **prism_options), offset_cache)
rescue ::Parser::SyntaxError
raise if !recover
end
@@ -285,6 +335,20 @@ module Prism
)
end
+ # Options for how prism should parse/lex the source.
+ def prism_options
+ options = {
+ filepath: @source_buffer.name,
+ version: convert_for_prism(version),
+ partial_script: true,
+ }
+ # The parser gem always encodes to UTF-8, unless it is binary.
+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/source/buffer.rb#L80-L107
+ options[:encoding] = false if @source_buffer.source.encoding != Encoding::BINARY
+
+ options
+ end
+
# Converts the version format handled by Parser to the format handled by Prism.
def convert_for_prism(version)
case version
@@ -292,11 +356,16 @@ module Prism
"3.3.1"
when 34
"3.4.0"
+ when 35, 40
+ "4.0.0"
+ when 41
+ "4.1.0"
else
"latest"
end
end
+ require_relative "parser/builder"
require_relative "parser/compiler"
require_relative "parser/lexer"
diff --git a/lib/prism/translation/parser/builder.rb b/lib/prism/translation/parser/builder.rb
new file mode 100644
index 0000000000..7fc3bba6b7
--- /dev/null
+++ b/lib/prism/translation/parser/builder.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ class Parser
+ # A builder that knows how to convert more modern Ruby syntax
+ # into whitequark/parser gem's syntax tree.
+ class Builder < ::Parser::Builders::Default
+ # It represents the `it` block argument, which is not yet implemented in
+ # the Parser gem.
+ def itarg
+ n(:itarg, [:it], nil)
+ end
+
+ # The following three lines have been added to support the `it` block
+ # parameter syntax in the source code below.
+ #
+ # if args.type == :itarg
+ # block_type = :itblock
+ # args = :it
+ #
+ # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
+ def block(method_call, begin_t, args, body, end_t)
+ _receiver, _selector, *call_args = *method_call
+
+ if method_call.type == :yield
+ diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
+ end
+
+ last_arg = call_args.last
+ if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
+ diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
+ end
+
+ if args.type == :itarg
+ block_type = :itblock
+ args = :it
+ elsif args.type == :numargs
+ block_type = :numblock
+ args = args.children[0]
+ else
+ block_type = :block
+ end
+
+ if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
+ n(block_type, [ method_call, args, body ],
+ block_map(method_call.loc.expression, begin_t, end_t))
+ else
+ # Code like "return foo 1 do end" is reduced in a weird sequence.
+ # Here, method_call is actually (return).
+ actual_send, = *method_call
+ block =
+ n(block_type, [ actual_send, args, body ],
+ block_map(actual_send.loc.expression, begin_t, end_t))
+
+ n(method_call.type, [ block ],
+ method_call.loc.with_expression(join_exprs(method_call, block)))
+ end
+ end
+
+ # def foo(&nil); end
+ # ^^^^
+ def blocknilarg(amper_t, nil_t)
+ n0(:blocknilarg, arg_prefix_map(amper_t, nil_t))
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index d66e553fa3..d11db12ae6 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1,13 +1,14 @@
# frozen_string_literal: true
+# :markup: markdown
module Prism
module Translation
class Parser
# A visitor that knows how to convert a prism syntax tree into the
# whitequark/parser gem's syntax tree.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# Raised when the tree is malformed or there is a bug in the compiler.
- class CompilationError < StandardError
+ class CompilationError < StandardError # :nodoc:
end
# The Parser::Base instance that is being used to build the AST.
@@ -74,7 +75,29 @@ module Prism
# []
# ^^
def visit_array_node(node)
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
+ elements = node.elements.flat_map do |element|
+ if element.is_a?(StringNode)
+ if element.content.include?("\n")
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
+ end
+ elsif element.is_a?(InterpolatedStringNode)
+ builder.string_compose(
+ token(element.opening_loc),
+ string_nodes_from_interpolation(element, node.opening),
+ token(element.closing_loc)
+ )
+ else
+ [visit(element)]
+ end
+ end
+ else
+ elements = visit_all(node.elements)
+ end
+
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
end
# foo => [bar]
@@ -111,8 +134,8 @@ module Prism
def visit_assoc_node(node)
key = node.key
- if in_pattern
- if node.value.is_a?(ImplicitNode)
+ if node.value.is_a?(ImplicitNode)
+ if in_pattern
if key.is_a?(SymbolNode)
if key.opening.nil?
builder.match_hash_var([key.unescaped, srange(key.location)])
@@ -122,19 +145,18 @@ module Prism
else
builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
end
- elsif key.opening.nil?
- builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
else
- builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
- end
- elsif node.value.is_a?(ImplicitNode)
- if (value = node.value.value).is_a?(LocalVariableReadNode)
- builder.pair_keyword(
- [key.unescaped, srange(key)],
+ value = node.value.value
+
+ implicit_value = if value.is_a?(CallNode)
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
+ elsif value.is_a?(ConstantReadNode)
+ builder.const([value.name, srange(key.value_loc)])
+ else
builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
- )
- else
- builder.pair_label([key.unescaped, srange(key.location)])
+ end
+
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
end
elsif node.operator_loc
builder.pair(visit(key), token(node.operator_loc), visit(node.value))
@@ -181,14 +203,21 @@ module Prism
if (rescue_clause = node.rescue_clause)
begin
find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
+ find_end_offset = (
+ rescue_clause.statements&.location&.start_offset ||
+ rescue_clause.subsequent&.location&.start_offset ||
+ node.else_clause&.location&.start_offset ||
+ node.ensure_clause&.location&.start_offset ||
+ node.end_keyword_loc&.start_offset ||
+ find_start_offset + 1
+ )
rescue_bodies << builder.rescue_body(
token(rescue_clause.keyword_loc),
rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
token(rescue_clause.operator_loc),
visit(rescue_clause.reference),
- srange_find(find_start_offset, find_end_offset, ";"),
+ srange_semicolon(find_start_offset, find_end_offset),
visit(rescue_clause.statements)
)
end until (rescue_clause = rescue_clause.subsequent).nil?
@@ -268,11 +297,6 @@ module Prism
if node.call_operator_loc.nil?
case name
- when :-@
- case (receiver = node.receiver).type
- when :integer_node, :float_node, :rational_node, :imaginary_node
- return visit(numeric_negate(node.message_loc, receiver))
- end
when :!
return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
when :=~
@@ -294,7 +318,7 @@ module Prism
visit_all(arguments),
token(node.closing_loc),
),
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
),
block
@@ -311,7 +335,7 @@ module Prism
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
builder.assign(
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="),
+ token(node.equal_loc),
visit(node.arguments.arguments.last)
)
else
@@ -664,13 +688,37 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
- builder.keyword_cmd(
- :defined?,
- token(node.keyword_loc),
- token(node.lparen_loc),
- [visit(node.value)],
- token(node.rparen_loc)
- )
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in parser
+ # it gets parsed as having a begin. In this case we need to synthesize
+ # that begin to match parser's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ nil,
+ [
+ builder.begin(
+ token(node.lparen_loc),
+ visit(node.value),
+ token(node.rparen_loc)
+ )
+ ],
+ nil
+ )
+ else
+ builder.keyword_cmd(
+ :defined?,
+ token(node.keyword_loc),
+ token(node.lparen_loc),
+ [visit(node.value)],
+ token(node.rparen_loc)
+ )
+ end
end
# if foo then bar else baz end
@@ -736,7 +784,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
token(node.end_keyword_loc)
@@ -868,7 +916,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset)
end,
visit(node.statements),
case node.subsequent
@@ -934,7 +982,7 @@ module Prism
if (then_loc = node.then_loc)
token(then_loc)
else
- srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";")
+ srange_semicolon(node.pattern.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1000,7 +1048,7 @@ module Prism
builder.index_asgn(
visit(node.receiver),
token(node.opening_loc),
- visit_all(node.arguments.arguments),
+ visit_all(node.arguments&.arguments || []),
token(node.closing_loc),
)
end
@@ -1068,7 +1116,7 @@ module Prism
def visit_interpolated_regular_expression_node(node)
builder.regexp_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
)
@@ -1085,29 +1133,9 @@ module Prism
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
end
- parts = if node.parts.one? { |part| part.type == :string_node }
- node.parts.flat_map do |node|
- if node.type == :string_node && node.unescaped.lines.count >= 2
- start_offset = node.content_loc.start_offset
-
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
- end
- else
- visit(node)
- end
- end
- else
- visit_all(node.parts)
- end
-
builder.string_compose(
token(node.opening_loc),
- parts,
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -1117,7 +1145,7 @@ module Prism
def visit_interpolated_symbol_node(node)
builder.symbol_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
token(node.closing_loc)
)
end
@@ -1126,14 +1154,14 @@ module Prism
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
if node.heredoc?
- visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
- else
- builder.xstring_compose(
- token(node.opening_loc),
- visit_all(node.parts),
- token(node.closing_loc)
- )
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
end
+
+ builder.xstring_compose(
+ token(node.opening_loc),
+ string_nodes_from_interpolation(node, node.opening),
+ token(node.closing_loc)
+ )
end
# -> { it }
@@ -1145,7 +1173,17 @@ module Prism
# -> { it }
# ^^^^^^^^^
def visit_it_parameters_node(node)
- builder.args(nil, [], nil, false)
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
+ # Currently RuboCop passes in its own builder that always inherits from the
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
+ # opts in to use the custom prism builder a warning can be emitted when
+ # it is not the expected class, and eventually raise.
+ # https://github.com/rubocop/rubocop-ast/pull/354
+ if builder.is_a?(Translation::Parser::Builder)
+ builder.itarg
+ else
+ builder.args(nil, [], nil, false)
+ end
end
# foo(bar: baz)
@@ -1187,7 +1225,7 @@ module Prism
false
)
end,
- node.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
+ visit(node.body),
[node.closing, srange(node.closing_loc)]
)
end
@@ -1281,7 +1319,7 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
+ def visit_error_recovery_node(node)
::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
end
@@ -1311,7 +1349,7 @@ module Prism
def visit_multi_write_node(node)
elements = multi_target_elements(node)
- if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
elements = multi_target_elements(elements.first)
end
@@ -1347,6 +1385,12 @@ module Prism
builder.nil(token(node.location))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ builder.blocknilarg(token(node.operator_loc), token(node.keyword_loc))
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1439,7 +1483,8 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
- expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
+ parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest
+ expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc))
builder.pin(token(node.operator_loc), expression)
end
@@ -1511,15 +1556,13 @@ module Prism
# /foo/
# ^^^^^
def visit_regular_expression_node(node)
- content = node.content
parts =
- if content.include?("\n")
- offset = node.content_loc.start_offset
- content.lines.map do |line|
- builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
- end
+ if node.content == ""
+ []
+ elsif node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
else
- [builder.string_internal(token(node.content_loc))]
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
end
builder.regexp_compose(
@@ -1676,28 +1719,11 @@ module Prism
elsif node.opening&.start_with?("%") && node.unescaped.empty?
builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
else
- content_lines = node.content.lines
- unescaped_lines = node.unescaped.lines
-
parts =
- if content_lines.length <= 1 || unescaped_lines.length <= 1
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
- elsif content_lines.length != unescaped_lines.length
- # This occurs when we have line continuations in the string. We
- # need to come back and fix this, but for now this stops the
- # code from breaking when we encounter it because of trying to
- # transpose arrays of different lengths.
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
+ if node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
else
- start_offset = node.content_loc.start_offset
-
- [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
- end_offset = start_offset + content_line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([unescaped_line, offsets])
- end
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
end
builder.string_compose(
@@ -1741,19 +1767,14 @@ module Prism
builder.symbol([node.unescaped, srange(node.location)])
end
else
- parts = if node.value.lines.one?
- [builder.string_internal([node.unescaped, srange(node.value_loc)])]
- else
- start_offset = node.value_loc.start_offset
-
- node.value.lines.map do |line|
- end_offset = start_offset + line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
+ parts =
+ if node.value_loc.nil?
+ []
+ elsif node.value.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
end
- end
builder.symbol_compose(
token(node.opening_loc),
@@ -1788,7 +1809,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset)
end,
visit(node.else_clause),
token(node.else_clause&.else_keyword_loc),
@@ -1819,7 +1840,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
end,
visit(node.statements),
token(node.closing_loc)
@@ -1843,7 +1864,7 @@ module Prism
if (then_keyword_loc = node.then_keyword_loc)
token(then_keyword_loc)
else
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";")
+ srange_semicolon(node.conditions.last.location.end_offset, node.statements&.location&.start_offset)
end,
visit(node.statements)
)
@@ -1863,7 +1884,7 @@ module Prism
if (do_keyword_loc = node.do_keyword_loc)
token(do_keyword_loc)
else
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
+ srange_semicolon(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset)
end,
visit(node.statements),
token(node.closing_loc)
@@ -1882,28 +1903,23 @@ module Prism
# ^^^^^
def visit_x_string_node(node)
if node.heredoc?
- visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
- else
- parts = if node.unescaped.lines.one?
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
- else
- start_offset = node.content_loc.start_offset
-
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.length
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
+ return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
+ end
- builder.string_internal([line, offsets])
- end
+ parts =
+ if node.content == ""
+ []
+ elsif node.content.include?("\n")
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+ else
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
end
- builder.xstring_compose(
- token(node.opening_loc),
- parts,
- token(node.closing_loc)
- )
- end
+ builder.xstring_compose(
+ token(node.opening_loc),
+ parts,
+ token(node.closing_loc)
+ )
end
# yield
@@ -1952,22 +1968,6 @@ module Prism
elements
end
- # Negate the value of a numeric node. This is a special case where you
- # have a negative sign on one line and then a number on the next line.
- # In normal Ruby, this will always be a method call. The parser gem,
- # however, marks this as a numeric literal. We have to massage the tree
- # here to get it into the correct form.
- def numeric_negate(message_loc, receiver)
- case receiver.type
- when :integer_node, :float_node
- receiver.copy(value: -receiver.value, location: message_loc.join(receiver.location))
- when :rational_node
- receiver.copy(numerator: -receiver.numerator, location: message_loc.join(receiver.location))
- when :imaginary_node
- receiver.copy(numeric: numeric_negate(message_loc, receiver.numeric), location: message_loc.join(receiver.location))
- end
- end
-
# Blocks can have a special set of parameters that automatically expand
# when given arrays if they have a single required parameter and no
# other parameters.
@@ -1997,16 +1997,16 @@ module Prism
Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
end
- # Constructs a new source range by finding the given character between
- # the given start offset and end offset. If the needle is not found, it
- # returns nil. Importantly it does not search past newlines or comments.
+ # Constructs a new source range by finding a semicolon between the given
+ # start offset and end offset. If the semicolon is not found, it returns
+ # nil. Importantly it does not search past newlines or comments.
#
# Note that end_offset is allowed to be nil, in which case this will
# search until the end of the string.
- def srange_find(start_offset, end_offset, character)
- if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/])
+ def srange_semicolon(start_offset, end_offset)
+ if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*;/])
final_offset = start_offset + match.bytesize
- [character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])]
+ [";", Range.new(source_buffer, offset_cache[final_offset - 1], offset_cache[final_offset])]
end
end
@@ -2042,7 +2042,7 @@ module Prism
false
)
end,
- block.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
+ visit(block.body),
token(block.closing_loc)
)
else
@@ -2050,13 +2050,6 @@ module Prism
end
end
- # The parser gem automatically converts \r\n to \n, meaning our offsets
- # need to be adjusted to always subtract 1 from the length.
- def chomped_bytesize(line)
- chomped = line.chomp
- chomped.bytesize + (chomped == line ? 0 : 1)
- end
-
# Visit a heredoc that can be either a string or an xstring.
def visit_heredoc(node)
children = Array.new
@@ -2073,34 +2066,8 @@ module Prism
node.parts.each do |part|
pushing =
- if part.is_a?(StringNode) && part.unescaped.include?("\n")
- unescaped = part.unescaped.lines
- escaped = part.content.lines
-
- escaped_lengths = []
- normalized_lengths = []
-
- if node.opening.end_with?("'")
- escaped.each do |line|
- escaped_lengths << line.bytesize
- normalized_lengths << chomped_bytesize(line)
- end
- else
- escaped
- .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
- .each do |lines|
- escaped_lengths << lines.sum(&:bytesize)
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
- end
- end
-
- start_offset = part.location.start_offset
-
- unescaped.map.with_index do |unescaped_line, index|
- inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
- start_offset += escaped_lengths.fetch(index, 0)
- inner_part
- end
+ if part.is_a?(StringNode) && part.content.include?("\n")
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
else
[visit(part)]
end
@@ -2114,7 +2081,7 @@ module Prism
location = appendee.loc
location = location.with_expression(location.expression.join(child.loc.expression))
- children[-1] = appendee.updated(:str, [appendee.children.first << child.children.first], location: location)
+ children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
else
children << child
end
@@ -2150,6 +2117,102 @@ module Prism
parser.pattern_variables.pop
end
end
+
+ # When the content of a string node is split across multiple lines, the
+ # parser gem creates individual string nodes for each line the content is part of.
+ def string_nodes_from_interpolation(node, opening)
+ node.parts.flat_map do |part|
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
+ else
+ visit(part)
+ end
+ end
+ end
+
+ # Create parser string nodes from a single prism node. The parser gem
+ # "glues" strings together when a line continuation is encountered.
+ def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
+ unescaped = unescaped.lines
+ escaped = escaped.lines
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+ regex = opening == "/" || opening&.start_with?("%r")
+
+ # Non-interpolating strings
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
+ current_length = 0
+ current_line = +""
+
+ escaped.filter_map.with_index do |escaped_line, index|
+ unescaped_line = unescaped.fetch(index, "")
+ current_length += escaped_line.bytesize
+ current_line << unescaped_line
+
+ # Glue line continuations together. Only %w and %i arrays can contain these.
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
+ next unless index == escaped.count - 1
+ end
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
+ start_offset += escaped_line.bytesize
+ current_line = +""
+ current_length = 0
+ s
+ end
+ else
+ escaped_lengths = []
+ normalized_lengths = []
+ # Keeps track of where an unescaped line should start a new token. An unescaped
+ # \n would otherwise be indistinguishable from the actual newline at the end of
+ # of the line. The parser gem only emits a new string node at "real" newlines,
+ # line continuations don't start a new node as well.
+ do_next_tokens = []
+
+ escaped
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
+ .each do |lines|
+ escaped_lengths << lines.sum(&:bytesize)
+
+ unescaped_lines_count =
+ if regex
+ 0 # Will always be preserved as is
+ else
+ lines.sum do |line|
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+ count -= 1 if line.match?(/(?:\A|[^\\])(?:\\\\)*\\n\z/) && count > 0
+ count
+ end
+ end
+
+ extra = 1
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
+
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
+ do_next_tokens[-1] = true
+ end
+
+ current_line = +""
+ current_normalized_length = 0
+
+ emitted_count = 0
+ unescaped.filter_map.with_index do |unescaped_line, index|
+ current_line << unescaped_line
+ current_normalized_length += normalized_lengths.fetch(index, 0)
+
+ if do_next_tokens[index]
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
+ current_line = +""
+ current_normalized_length = 0
+ emitted_count += 1
+ inner_part
+ else
+ nil
+ end
+ end
+ end
+ end
end
end
end
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index db7dbb1c87..e82042867f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -1,21 +1,25 @@
# frozen_string_literal: true
+# :markup: markdown
+
+require "strscan"
+require_relative "../../polyfill/append_as_bytes"
+require_relative "../../polyfill/scan_byte"
module Prism
module Translation
class Parser
# Accepts a list of prism tokens and converts them into the expected
# format for the parser gem.
- class Lexer
+ class Lexer # :nodoc:
+ # These tokens are always skipped
+ TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
+ private_constant :TYPES_ALWAYS_SKIP
+
# The direct translating of types between the two lexers.
TYPES = {
# These tokens should never appear in the output of the lexer.
- EOF: nil,
- MISSING: nil,
- NOT_PROVIDED: nil,
- IGNORED_NEWLINE: nil,
EMBDOC_END: nil,
EMBDOC_LINE: nil,
- __END__: nil,
# These tokens have more or less direct mappings.
AMPERSAND: :tAMPER2,
@@ -83,6 +87,7 @@ module Prism
KEYWORD_DEF: :kDEF,
KEYWORD_DEFINED: :kDEFINED,
KEYWORD_DO: :kDO,
+ KEYWORD_DO_BLOCK: :kDO_BLOCK,
KEYWORD_DO_LOOP: :kDO_COND,
KEYWORD_END: :kEND,
KEYWORD_END_UPCASE: :klEND,
@@ -184,23 +189,31 @@ module Prism
# without them. We should find another way to do this, but in the
# meantime we'll hide them from the documentation and mark them as
# private constants.
- EXPR_BEG = 0x1 # :nodoc:
- EXPR_LABEL = 0x400 # :nodoc:
+ EXPR_BEG = 0x1
+ EXPR_LABEL = 0x400
# It is used to determine whether `do` is of the token type `kDO` or `kDO_LAMBDA`.
#
# NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
# instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
- LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]
+ LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
# The following token types are listed as those classified as `tLPAREN`.
- LPAREN_CONVERSION_TOKEN_TYPES = [
- :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
- :tEQL, :tLPAREN, :tLPAREN2, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
- ]
+ LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
+ :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
+ :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
+ ])
+
+ # Types of tokens that are allowed to continue a method call with comments in-between.
+ # For these, the parser gem doesn't emit a newline token after the last comment.
+ COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
+ private_constant :COMMENT_CONTINUATION_TYPES
+
+ # Heredocs are complex and require us to keep track of a bit of info to refer to later
+ HeredocData = Struct.new(:identifier, :common_whitespace, keyword_init: true)
- private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL, :LAMBDA_TOKEN_TYPES, :LPAREN_CONVERSION_TOKEN_TYPES
+ private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL, :LAMBDA_TOKEN_TYPES, :LPAREN_CONVERSION_TOKEN_TYPES, :HeredocData
# The Parser::Source::Buffer that the tokens were lexed from.
attr_reader :source_buffer
@@ -220,7 +233,7 @@ module Prism
@offset_cache = offset_cache
end
- Range = ::Parser::Source::Range # :nodoc:
+ Range = ::Parser::Source::Range
private_constant :Range
# Convert the prism tokens into the expected format for the parser gem.
@@ -230,46 +243,78 @@ module Prism
index = 0
length = lexed.length
- heredoc_identifier_stack = []
+ heredoc_stack = []
+ quote_stack = []
+
+ # The parser gem emits the newline tokens for comments out of order. This saves
+ # that token location to emit at a later time to properly line everything up.
+ # https://github.com/whitequark/parser/issues/1025
+ comment_newline_location = nil
while index < length
token, state = lexed[index]
index += 1
- next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
+ next if TYPES_ALWAYS_SKIP.include?(token.type)
type = TYPES.fetch(token.type)
value = token.value
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
+ location = range(token.location.start_offset, token.location.end_offset)
case type
when :kDO
- types = tokens.map(&:first)
- nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }
+ nearest_lambda_token = tokens.reverse_each.find do |token|
+ LAMBDA_TOKEN_TYPES.include?(token.first)
+ end
- if nearest_lambda_token_type == :tLAMBDA
+ if nearest_lambda_token&.first == :tLAMBDA
type = :kDO_LAMBDA
end
when :tCHARACTER
value.delete_prefix!("?")
+ # Character literals behave similar to double-quoted strings. We can use the same escaping mechanism.
+ value = unescape_string(value, "?")
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
- start_index = index
- while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
+ while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
- if start_index != index
- value += next_token.value
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
- index += 1
- end
+ value += next_token.value
+ location = range(token.location.start_offset, next_token.location.end_offset)
+ index += 1
else
- value.chomp!
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
+ is_at_eol = value.chomp!.nil?
+ location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
+
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ next_token, _ = lexed[index]
+
+ is_inline_comment = prev_token&.location&.start_line == token.location.start_line
+ if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+ tokens << [:tCOMMENT, [value, location]]
+
+ nl_location = range(token.location.end_offset - 1, token.location.end_offset)
+ tokens << [:tNL, [nil, nl_location]]
+ next
+ elsif is_inline_comment && next_token&.type == :COMMENT
+ comment_newline_location = range(token.location.end_offset - 1, token.location.end_offset)
+ elsif comment_newline_location && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
+ tokens << [:tCOMMENT, [value, location]]
+ tokens << [:tNL, [nil, comment_newline_location]]
+ comment_newline_location = nil
+ next
+ end
end
when :tNL
+ next_token, _ = lexed[index]
+ # Newlines after comments are emitted out of order.
+ if next_token&.type == :COMMENT
+ comment_newline_location = location
+ next
+ end
+
value = nil
when :tFLOAT
value = parse_float(value)
@@ -277,8 +322,8 @@ module Prism
value = parse_complex(value)
when :tINTEGER
if value.start_with?("+")
- tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
- location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
+ tokens << [:tUNARY_NUM, ["+", range(token.location.start_offset, token.location.start_offset + 1)]]
+ location = range(token.location.start_offset + 1, token.location.end_offset)
end
value = parse_integer(value)
@@ -297,92 +342,196 @@ module Prism
when :tRATIONAL
value = parse_rational(value)
when :tSPACE
+ location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
value = nil
when :tSTRING_BEG
- if token.type == :HEREDOC_START
- heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
- end
- if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
+ next_token, _ = lexed[index]
+ next_next_token, _ = lexed[index + 1]
+ basic_quotes = value == '"' || value == "'"
+
+ if basic_quotes && next_token&.type == :STRING_END
next_location = token.location.join(next_token.location)
type = :tSTRING
value = ""
- location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+ location = range(next_location.start_offset, next_location.end_offset)
index += 1
- elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
- next_location = token.location.join(next_next_token.location)
- type = :tSTRING
- value = next_token.value.gsub("\\\\", "\\")
- location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
- index += 2
- elsif value.start_with?("<<")
+ elsif value.start_with?("'", '"', "%")
+ if next_token&.type == :STRING_CONTENT && next_next_token&.type == :STRING_END
+ string_value = next_token.value
+ if simplify_string?(string_value, value)
+ next_location = token.location.join(next_next_token.location)
+ if percent_array?(value)
+ value = percent_array_unescape(string_value)
+ else
+ value = unescape_string(string_value, value)
+ end
+ type = :tSTRING
+ location = range(next_location.start_offset, next_location.end_offset)
+ index += 2
+ tokens << [type, [value, location]]
+
+ next
+ end
+ end
+
+ quote_stack.push(value)
+ elsif token.type == :HEREDOC_START
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
+ heredoc_type = value[2] == "-" || value[2] == "~" ? value[2] : ""
+ heredoc = HeredocData.new(
+ identifier: value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier],
+ common_whitespace: 0,
+ )
+
if quote == "`"
type = :tXSTRING_BEG
- value = "<<`"
+ end
+
+ # The parser gem trims whitespace from squiggly heredocs. We must record
+ # the most common whitespace to later remove.
+ if heredoc_type == "~" || heredoc_type == "`"
+ heredoc.common_whitespace = calculate_heredoc_whitespace(index)
+ end
+
+ if quote == "'" || quote == '"' || quote == "`"
+ value = "<<#{quote}"
else
- value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
+ value = '<<"'
end
+
+ heredoc_stack.push(heredoc)
+ quote_stack.push(value)
end
when :tSTRING_CONTENT
- unless (lines = token.value.lines).one?
- start_offset = offset_cache[token.location.start_offset]
- lines.map do |line|
- newline = line.end_with?("\r\n") ? "\r\n" : "\n"
+ is_percent_array = percent_array?(quote_stack.last)
+
+ if (lines = token.value.lines).one?
+ # Prism usually emits a single token for strings with line continuations.
+ # For squiggly heredocs they are not joined so we do that manually here.
+ current_string = +""
+ current_length = 0
+ start_offset = token.location.start_offset
+ while token.type == :STRING_CONTENT
+ current_length += token.value.bytesize
+ # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
+ # The parser gem only removes indentation when the heredoc is not nested
+ not_nested = heredoc_stack.size == 1
+ if is_percent_array
+ value = percent_array_unescape(token.value)
+ elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
+ value = trim_heredoc_whitespace(token.value, current_heredoc)
+ end
+
+ current_string << unescape_string(value, quote_stack.last)
+ relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
+ 0 # the last backslash escapes the newline
+ else
+ token.value[/(\\{1,})\n/, 1]&.length || 0
+ end
+ if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
+ tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
+ break
+ end
+ token, _ = lexed[index]
+ index += 1
+ end
+ else
+ # When the parser gem encounters a line continuation inside of a multiline string,
+ # it emits a single string node. The backslash (and remaining newline) is removed.
+ current_line = +""
+ adjustment = 0
+ start_offset = token.location.start_offset
+ emit = false
+
+ lines.each.with_index do |line, index|
chomped_line = line.chomp
- if match = chomped_line.match(/(?<backslashes>\\+)\z/)
- adjustment = match[:backslashes].size / 2
- adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
- if match[:backslashes].size.odd?
- adjusted_line.delete_suffix!("\\")
- adjustment += 2
+ backslash_count = chomped_line[/\\{1,}\z/]&.length || 0
+ is_interpolation = interpolation?(quote_stack.last)
+
+ if backslash_count.odd? && (is_interpolation || is_percent_array)
+ if is_percent_array
+ current_line << percent_array_unescape(line)
+ adjustment += 1
else
- adjusted_line << newline
+ chomped_line.delete_suffix!("\\")
+ current_line << chomped_line
+ adjustment += 2
end
+ # If the string ends with a line continuation emit the remainder
+ emit = index == lines.count - 1
else
- adjusted_line = line
- adjustment = 0
+ current_line << line
+ emit = true
end
- end_offset = start_offset + adjusted_line.length + adjustment
- tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
- start_offset = end_offset
+ if emit
+ end_offset = start_offset + current_line.bytesize + adjustment
+ tokens << [:tSTRING_CONTENT, [unescape_string(current_line, quote_stack.last), range(start_offset, end_offset)]]
+ start_offset = end_offset
+ current_line = +""
+ adjustment = 0
+ end
end
- next
end
+ next
when :tSTRING_DVAR
value = nil
when :tSTRING_END
if token.type == :HEREDOC_END && value.end_with?("\n")
newline_length = value.end_with?("\r\n") ? 2 : 1
- value = heredoc_identifier_stack.pop
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
+ value = heredoc_stack.pop.identifier
+ location = range(token.location.start_offset, token.location.end_offset - newline_length)
elsif token.type == :REGEXP_END
value = value[0]
- location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
+ location = range(token.location.start_offset, token.location.start_offset + 1)
+ end
+
+ if percent_array?(quote_stack.pop)
+ prev_token, _ = lexed[index - 2] if index - 2 >= 0
+ empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
+ ends_with_whitespace = prev_token&.type == :WORDS_SEP
+ # parser always emits a space token after content in a percent array, even if no actual whitespace is present.
+ if !empty && !ends_with_whitespace
+ tokens << [:tSPACE, [nil, range(token.location.start_offset, token.location.start_offset)]]
+ end
end
when :tSYMBEG
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
+ if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
- location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+ location = range(next_location.start_offset, next_location.end_offset)
index += 1
+ else
+ quote_stack.push(value)
end
when :tFID
if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
type = :tIDENTIFIER
end
when :tXSTRING_BEG
- if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
+ if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
+ # self.`()
type = :tBACK_REF2
end
+ quote_stack.push(value)
+ when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
+ if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
+ index += 1
+ end
+
+ quote_stack.push(value)
+ when :tREGEXP_BEG
+ quote_stack.push(value)
end
tokens << [type, [value, location]]
if token.type == :REGEXP_END
- tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
+ tokens << [:tREGEXP_OPT, [token.value[1..], range(token.location.start_offset + 1, token.location.end_offset)]]
end
end
@@ -391,6 +540,11 @@ module Prism
private
+ # Creates a new parser range, taking prisms byte offsets into account
+ def range(start_offset, end_offset)
+ Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
+ end
+
# Parse an integer from the string representation.
def parse_integer(value)
Integer(value)
@@ -432,6 +586,233 @@ module Prism
rescue ArgumentError
0r
end
+
+ # Wonky heredoc tab/spaces rules.
+ # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L10548-L10558
+ def calculate_heredoc_whitespace(heredoc_token_index)
+ next_token_index = heredoc_token_index
+ nesting_level = 0
+ previous_line = -1
+ result = Float::MAX
+
+ while (next_token = lexed[next_token_index]&.first)
+ next_token_index += 1
+ next_next_token, _ = lexed[next_token_index]
+ first_token_on_line = next_token.location.start_column == 0
+
+ # String content inside nested heredocs and interpolation is ignored
+ if next_token.type == :HEREDOC_START || next_token.type == :EMBEXPR_BEGIN
+ # When interpolation is the first token of a line there is no string
+ # content to check against. There will be no common whitespace.
+ if nesting_level == 0 && first_token_on_line
+ result = 0
+ end
+ nesting_level += 1
+ elsif next_token.type == :HEREDOC_END || next_token.type == :EMBEXPR_END
+ nesting_level -= 1
+ # When we encountered the matching heredoc end, we can exit
+ break if nesting_level == -1
+ elsif next_token.type == :STRING_CONTENT && nesting_level == 0 && first_token_on_line
+ common_whitespace = 0
+ next_token.value[/^\s*/].each_char do |char|
+ if char == "\t"
+ common_whitespace = (common_whitespace / 8 + 1) * 8;
+ else
+ common_whitespace += 1
+ end
+ end
+
+ is_first_token_on_line = next_token.location.start_line != previous_line
+ # Whitespace is significant if followed by interpolation
+ whitespace_only = common_whitespace == next_token.value.length && next_next_token&.location&.start_line != next_token.location.start_line
+ if is_first_token_on_line && !whitespace_only && common_whitespace < result
+ result = common_whitespace
+ previous_line = next_token.location.start_line
+ end
+ end
+ end
+ result
+ end
+
+ # Wonky heredoc tab/spaces rules.
+ # https://github.com/ruby/prism/blob/v1.3.0/src/prism.c#L16528-L16545
+ def trim_heredoc_whitespace(string, heredoc)
+ trimmed_whitespace = 0
+ trimmed_characters = 0
+ while (string[trimmed_characters] == "\t" || string[trimmed_characters] == " ") && trimmed_whitespace < heredoc.common_whitespace
+ if string[trimmed_characters] == "\t"
+ trimmed_whitespace = (trimmed_whitespace / 8 + 1) * 8;
+ break if trimmed_whitespace > heredoc.common_whitespace
+ else
+ trimmed_whitespace += 1
+ end
+ trimmed_characters += 1
+ end
+
+ string[trimmed_characters..]
+ end
+
+ # Escape sequences that have special and should appear unescaped in the resulting string.
+ ESCAPES = {
+ "a" => "\a", "b" => "\b", "e" => "\e", "f" => "\f",
+ "n" => "\n", "r" => "\r", "s" => "\s", "t" => "\t",
+ "v" => "\v", "\\" => "\\"
+ }.freeze
+ private_constant :ESCAPES
+
+ # When one of these delimiters is encountered, then the other
+ # one is allowed to be escaped as well.
+ DELIMITER_SYMETRY = { "[" => "]", "(" => ")", "{" => "}", "<" => ">" }.freeze
+ private_constant :DELIMITER_SYMETRY
+
+
+ # https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/lexer-strings.rl#L14
+ REGEXP_META_CHARACTERS = ["\\", "$", "(", ")", "*", "+", ".", "<", ">", "?", "[", "]", "^", "{", "|", "}"]
+ private_constant :REGEXP_META_CHARACTERS
+
+ # Apply Ruby string escaping rules
+ def unescape_string(string, quote)
+ # In single-quoted heredocs, everything is taken literally.
+ return string if quote == "<<'"
+
+ # OPTIMIZATION: Assume that few strings need escaping to speed up the common case.
+ return string unless string.include?("\\")
+
+ # Enclosing character for the string. `"` for `"foo"`, `{` for `%w{foo}`, etc.
+ delimiter = quote[-1]
+
+ if regexp?(quote)
+ # Should be escaped handled to single-quoted heredocs. The only character that is
+ # allowed to be escaped is the delimiter, except when that also has special meaning
+ # in the regexp. Since all the symetry delimiters have special meaning, they don't need
+ # to be considered separately.
+ if REGEXP_META_CHARACTERS.include?(delimiter)
+ string
+ else
+ # There can never be an even amount of backslashes. It would be a syntax error.
+ string.gsub(/\\(#{Regexp.escape(delimiter)})/, '\1')
+ end
+ elsif interpolation?(quote)
+ # Appending individual escape sequences may force the string out of its intended
+ # encoding. Start out with binary and force it back later.
+ result = "".b
+
+ scanner = StringScanner.new(string)
+ while (skipped = scanner.skip_until(/\\/))
+ # Append what was just skipped over, excluding the found backslash.
+ result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
+ escape_read(result, scanner, false, false)
+ end
+
+ # Add remaining chars
+ result.append_as_bytes(string.byteslice(scanner.pos..))
+ result.force_encoding(source_buffer.source.encoding)
+ else
+ delimiters = Regexp.escape("#{delimiter}#{DELIMITER_SYMETRY[delimiter]}")
+ string.gsub(/\\([\\#{delimiters}])/, '\1')
+ end
+ end
+
+ # Certain strings are merged into a single string token.
+ def simplify_string?(value, quote)
+ case quote
+ when "'"
+ # Only simplify 'foo'
+ !value.include?("\n")
+ when '"'
+ # Simplify when every line ends with a line continuation, or it is the last line
+ value.lines.all? do |line|
+ !line.end_with?("\n") || line[/(\\*)$/, 1]&.length&.odd?
+ end
+ else
+ # %q and similar are never simplified
+ false
+ end
+ end
+
+ # Escape a byte value, given the control and meta flags.
+ def escape_build(value, control, meta)
+ value &= 0x9f if control
+ value |= 0x80 if meta
+ value
+ end
+
+ # Read an escape out of the string scanner, given the control and meta
+ # flags, and push the unescaped value into the result.
+ def escape_read(result, scanner, control, meta)
+ if scanner.skip("\n")
+ # Line continuation
+ elsif (value = ESCAPES[scanner.peek(1)])
+ # Simple single-character escape sequences like \n
+ result.append_as_bytes(value)
+ scanner.pos += 1
+ elsif (value = scanner.scan(/[0-7]{1,3}/))
+ # \nnn
+ result.append_as_bytes(escape_build(value.to_i(8), control, meta))
+ elsif (value = scanner.scan(/x[0-9a-fA-F]{1,2}/))
+ # \xnn
+ result.append_as_bytes(escape_build(value[1..].to_i(16), control, meta))
+ elsif (value = scanner.scan(/u[0-9a-fA-F]{4}/))
+ # \unnnn
+ result.append_as_bytes(value[1..].hex.chr(Encoding::UTF_8))
+ elsif scanner.skip("u{}")
+ # https://github.com/whitequark/parser/issues/856
+ elsif (value = scanner.scan(/u{.*?}/))
+ # \u{nnnn ...}
+ value[2..-2].split.each do |unicode|
+ result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
+ end
+ elsif (value = scanner.scan(/c\\?(?=[[:print:]])|C-\\?(?=[[:print:]])/))
+ # \cx or \C-x where x is an ASCII printable character
+ escape_read(result, scanner, true, meta)
+ elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
+ # \M-x where x is an ASCII printable character
+ escape_read(result, scanner, control, true)
+ elsif (byte = scanner.scan_byte)
+ # Something else after an escape.
+ if control && byte == 0x3f # ASCII '?'
+ result.append_as_bytes(escape_build(0x7f, false, meta))
+ else
+ result.append_as_bytes(escape_build(byte, control, meta))
+ end
+ end
+ end
+
+ # In a percent array, certain whitespace can be preceeded with a backslash,
+ # causing the following characters to be part of the previous element.
+ def percent_array_unescape(string)
+ string.gsub(/(\\)+[ \f\n\r\t\v]/) do |full_match|
+ full_match.delete_prefix!("\\") if Regexp.last_match[1].length.odd?
+ full_match
+ end
+ end
+
+ # For %-arrays whitespace, the parser gem only considers whitespace before the newline.
+ def percent_array_leading_whitespace(string)
+ return 1 if string.start_with?("\n")
+
+ leading_whitespace = 0
+ string.each_char do |c|
+ break if c == "\n"
+ leading_whitespace += 1
+ end
+ leading_whitespace
+ end
+
+ # Determine if characters preceeded by a backslash should be escaped or not
+ def interpolation?(quote)
+ !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
+ end
+
+ # Regexp allow interpolation but are handled differently during unescaping
+ def regexp?(quote)
+ quote == "/" || quote.start_with?("%r")
+ end
+
+ # Determine if the string is part of a %-style array.
+ def percent_array?(quote)
+ quote.start_with?("%w", "%W", "%i", "%I")
+ end
end
end
end
diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb
deleted file mode 100644
index b09266e06a..0000000000
--- a/lib/prism/translation/parser33.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
- class Parser33 < Parser
- def version # :nodoc:
- 33
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb
deleted file mode 100644
index 0ead70ad3c..0000000000
--- a/lib/prism/translation/parser34.rb
+++ /dev/null
@@ -1,12 +0,0 @@
-# frozen_string_literal: true
-
-module Prism
- module Translation
- # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
- class Parser34 < Parser
- def version # :nodoc:
- 34
- end
- end
- end
-end
diff --git a/lib/prism/translation/parser_current.rb b/lib/prism/translation/parser_current.rb
new file mode 100644
index 0000000000..f7c1070e30
--- /dev/null
+++ b/lib/prism/translation/parser_current.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+# :markup: markdown
+#--
+# typed: ignore
+
+module Prism
+ module Translation
+ case RUBY_VERSION
+ when /^3\.3\./
+ ParserCurrent = Parser33
+ when /^3\.4\./
+ ParserCurrent = Parser34
+ when /^3\.5\./, /^4\.0\./
+ ParserCurrent = Parser40
+ when /^4\.1\./
+ ParserCurrent = Parser41
+ else
+ # Keep this in sync with released Ruby.
+ parser = Parser40
+ major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
+ warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
+ "but you are running #{major}.#{minor}."
+ ParserCurrent = parser
+ end
+ end
+end
diff --git a/lib/prism/translation/parser_versions.rb b/lib/prism/translation/parser_versions.rb
new file mode 100644
index 0000000000..720c7d548c
--- /dev/null
+++ b/lib/prism/translation/parser_versions.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+module Prism
+ module Translation
+ # This class is the entry-point for Ruby 3.3 of `Prism::Translation::Parser`.
+ class Parser33 < Parser
+ def version # :nodoc:
+ 33
+ end
+ end
+
+ # This class is the entry-point for Ruby 3.4 of `Prism::Translation::Parser`.
+ class Parser34 < Parser
+ def version # :nodoc:
+ 34
+ end
+ end
+
+ # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`.
+ class Parser40 < Parser
+ def version # :nodoc:
+ 40
+ end
+ end
+
+ Parser35 = Parser40 # :nodoc:
+
+ # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`.
+ class Parser41 < Parser
+ def version # :nodoc:
+ 41
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index 018842715b..f179a149a1 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -1,6 +1,5 @@
# frozen_string_literal: true
-
-require "ripper"
+# :markup: markdown
module Prism
module Translation
@@ -23,22 +22,10 @@ module Prism
# - on_comma
# - on_ignored_nl
# - on_ignored_sp
- # - on_kw
- # - on_label_end
- # - on_lbrace
- # - on_lbracket
- # - on_lparen
# - on_nl
- # - on_op
# - on_operator_ambiguous
- # - on_rbrace
- # - on_rbracket
- # - on_rparen
# - on_semicolon
# - on_sp
- # - on_symbeg
- # - on_tstring_beg
- # - on_tstring_end
#
class Ripper < Compiler
# Parses the given Ruby program read from +src+.
@@ -70,7 +57,8 @@ module Prism
# [[1, 13], :on_kw, "end", END ]]
#
def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
- result = Prism.lex_compat(src, filepath: filename, line: lineno)
+ coerced = coerce_source(src)
+ result = Prism.lex_compat(coerced, filepath: filename, line: lineno, version: "current", encoding: coerced.encoding)
if result.failure? && raise_errors
raise SyntaxError, result.errors.first.message
@@ -79,6 +67,34 @@ module Prism
end
end
+ # Tokenizes the Ruby program and returns an array of strings.
+ # The +filename+ and +lineno+ arguments are mostly ignored, since the
+ # return value is just the tokenized input.
+ # By default, this method does not handle syntax errors in +src+,
+ # use the +raise_errors+ keyword to raise a SyntaxError for an error in +src+.
+ #
+ # p Ripper.tokenize("def m(a) nil end")
+ # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
+ #
+ def self.tokenize(...)
+ lex(...).map { |token| token[2] }
+ end
+
+ # Mirros the various lex_types that ripper supports
+ def self.coerce_source(source) # :nodoc:
+ if source.is_a?(IO)
+ source.read
+ elsif source.respond_to?(:gets)
+ src = +""
+ while line = source.gets
+ src << line
+ end
+ src
+ else
+ source.to_str
+ end
+ end
+
# This contains a table of all of the parser events and their
# corresponding arity.
PARSER_EVENT_TABLE = {
@@ -331,7 +347,7 @@ module Prism
"__ENCODING__",
"__FILE__",
"__LINE__"
- ]
+ ].to_set
# A list of all of the Ruby binary operators.
BINARY_OPERATORS = [
@@ -356,7 +372,7 @@ module Prism
:/,
:*,
:**
- ]
+ ].to_set
private_constant :KEYWORDS, :BINARY_OPERATORS
@@ -425,9 +441,93 @@ module Prism
end
end
+ autoload :Filter, "prism/translation/ripper/filter"
+ autoload :Lexer, "prism/translation/ripper/lexer"
autoload :SexpBuilder, "prism/translation/ripper/sexp"
autoload :SexpBuilderPP, "prism/translation/ripper/sexp"
+ # Provides optimized access to line and column information.
+ # Ripper bounds are mostly accessed in a linear fashion, so
+ # we can try a linear scan first and fall back to binary search.
+ class LineAndColumnCache # :nodoc:
+ # How many should it look ahead/behind before falling back to binary searching.
+ WINDOW = 8
+ private_constant :WINDOW
+
+ #: (Source source) -> void
+ def initialize(source)
+ @source = source
+ @offsets = source.offsets
+ @hint = 0
+ end
+
+ #: (Integer byte_offset) -> [Integer, Integer]
+ def line_and_column(byte_offset)
+ @hint = new_hint(byte_offset) || @source.find_line(byte_offset)
+ return [@hint + @source.start_line, byte_offset - @offsets[@hint]]
+ end
+
+ private
+
+ def new_hint(byte_offset)
+ if @offsets[@hint] <= byte_offset
+ # Same line?
+ if (@hint + 1 >= @offsets.size || @offsets[@hint + 1] > byte_offset)
+ return @hint
+ end
+
+ # Scan forwards
+ limit = [@hint + WINDOW + 1, @offsets.size].min
+ idx = @hint + 1
+ while idx < limit
+ if @offsets[idx] > byte_offset
+ return idx - 1
+ end
+ if @offsets[idx] == byte_offset
+ return idx
+ end
+ idx += 1
+ end
+ else
+ # Scan backwards
+ limit = @hint > WINDOW ? @hint - WINDOW : 0
+ idx = @hint
+ while idx >= limit + 1
+ if @offsets[idx - 1] <= byte_offset
+ return idx - 1
+ end
+ idx -= 1
+ end
+ end
+
+ nil
+ end
+ end
+
+ # :stopdoc:
+ # This is not part of the public API but used by some gems.
+
+ # Ripper-internal bitflags.
+ LEX_STATE_NAMES = %i[
+ BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM
+ ].map.with_index.to_h { |name, i| [2 ** i, name] }.freeze
+ private_constant :LEX_STATE_NAMES
+
+ LEX_STATE_NAMES.each do |value, key|
+ const_set("EXPR_#{key}", value)
+ end
+ EXPR_NONE = 0
+ EXPR_VALUE = EXPR_BEG
+ EXPR_BEG_ANY = EXPR_BEG | EXPR_MID | EXPR_CLASS
+ EXPR_ARG_ANY = EXPR_ARG | EXPR_CMDARG
+ EXPR_END_ANY = EXPR_END | EXPR_ENDARG | EXPR_ENDFN
+
+ def self.lex_state_name(state)
+ LEX_STATE_NAMES.filter_map { |flag, name| name if state & flag != 0 }.join("|")
+ end
+
+ # :startdoc:
+
# The source that is being parsed.
attr_reader :source
@@ -437,16 +537,17 @@ module Prism
# The current line number of the parser.
attr_reader :lineno
- # The current column number of the parser.
+ # The current column in bytes of the parser.
attr_reader :column
# Create a new Translation::Ripper object with the given source.
def initialize(source, filename = "(ripper)", lineno = 1)
- @source = source
+ @source = Ripper.coerce_source(source)
@filename = filename
@lineno = lineno
@column = 0
@result = nil
+ @line_and_column_cache = nil
end
##########################################################################
@@ -465,7 +566,12 @@ module Prism
bounds(location)
if comment.is_a?(InlineComment)
- on_comment(comment.slice)
+ # Inline comments always contain a newline if the line itself contains it
+ if result.source.source.bytesize > comment.location.end_offset
+ on_comment("#{comment.slice}\n")
+ else
+ on_comment(comment.slice)
+ end
else
offset = location.start_offset
lines = comment.slice.lines
@@ -546,9 +652,14 @@ module Prism
# Visitor methods
##########################################################################
+ # :stopdoc:
+
# alias foo bar
# ^^^^^^^^^^^^^
def visit_alias_method_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit(node.new_name)
old_name = visit(node.old_name)
@@ -559,6 +670,9 @@ module Prism
# alias $foo $bar
# ^^^^^^^^^^^^^^^
def visit_alias_global_variable_node(node)
+ bounds(node.keyword_loc)
+ on_kw("alias")
+
new_name = visit_alias_global_variable_node_value(node.new_name)
old_name = visit_alias_global_variable_node_value(node.old_name)
@@ -584,6 +698,10 @@ module Prism
# ^^^^^^^^^
def visit_alternation_pattern_node(node)
left = visit_pattern_node(node.left)
+
+ bounds(node.operator_loc)
+ on_op("|")
+
right = visit_pattern_node(node.right)
bounds(node.location)
@@ -594,7 +712,13 @@ module Prism
# parenthesis node that can be used to wrap patterns.
private def visit_pattern_node(node)
if node.is_a?(ParenthesesNode)
- visit(node.body)
+ bounds(node.opening_loc)
+ on_lparen("(")
+ result = visit(node.body)
+ bounds(node.closing_loc)
+ on_rparen(")")
+
+ result
else
visit(node)
end
@@ -604,6 +728,14 @@ module Prism
# ^^^^^^^
def visit_and_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "and"
+ on_kw("and")
+ else
+ on_op("&&")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -631,6 +763,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%i/
@@ -650,6 +784,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%W/
@@ -687,6 +823,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
when /^%I/
@@ -724,6 +862,8 @@ module Prism
previous = element
end
+ visit_words_sep(opening_loc, node.elements.last, node.closing_loc)
+
bounds(node.closing_loc)
on_tstring_end(node.closing)
else
@@ -740,15 +880,21 @@ module Prism
on_array(elements)
end
- # Dispatch a words_sep event that contains the space between the elements
+ # Dispatch words_sep events that contains the whitespace between the elements
# of list literals.
private def visit_words_sep(opening_loc, previous, current)
- end_offset = (previous.nil? ? opening_loc : previous.location).end_offset
- start_offset = current.location.start_offset
-
- if end_offset != start_offset
- bounds(current.location.copy(start_offset: end_offset))
- on_words_sep(source.byteslice(end_offset...start_offset))
+ start_offset = (previous.nil? ? opening_loc : previous.location).end_offset
+ end_offset = current.start_offset
+ length = end_offset - start_offset
+
+ if length > 0
+ whitespace = source.byteslice(start_offset, length)
+ current_offset = start_offset
+ whitespace.each_line do |part|
+ bounds(opening_loc.copy(start_offset: current_offset, length: part.bytesize))
+ on_words_sep(part)
+ current_offset += part.bytesize
+ end
end
end
@@ -774,9 +920,18 @@ module Prism
# ^^^^^
def visit_array_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+
requireds = visit_all(node.requireds) if node.requireds.any?
rest =
if (rest_node = node.rest).is_a?(SplatNode)
+ bounds(rest_node.operator_loc)
+ on_op("*")
+
if rest_node.expression.nil?
bounds(rest_node.location)
on_var_field(nil)
@@ -787,6 +942,10 @@ module Prism
posts = visit_all(node.posts) if node.posts.any?
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_aryptn(constant, requireds, rest, posts)
end
@@ -802,6 +961,12 @@ module Prism
# ^^^^
def visit_assoc_node(node)
key = visit(node.key)
+
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
value = visit(node.value)
bounds(node.location)
@@ -814,6 +979,9 @@ module Prism
# { **foo }
# ^^^^^
def visit_assoc_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
value = visit(node.value)
bounds(node.location)
@@ -830,8 +998,18 @@ module Prism
# begin end
# ^^^^^^^^^
def visit_begin_node(node)
+ if node.begin_keyword_loc
+ bounds(node.begin_keyword_loc)
+ on_kw("begin")
+ end
+
clauses = visit_begin_node_clauses(node.begin_keyword_loc, node, false)
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_begin(clauses)
end
@@ -843,7 +1021,7 @@ module Prism
on_stmts_add(on_stmts_new, on_void_stmt)
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(location, node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, node.statements.body[0].location, allow_newline)
bounds(node.statements.location)
visit_statements_node_body(body)
@@ -852,12 +1030,15 @@ module Prism
rescue_clause = visit(node.rescue_clause)
else_clause =
unless (else_clause_node = node.else_clause).nil?
+ bounds(else_clause_node.else_keyword_loc)
+ on_kw("else")
+
else_statements =
if else_clause_node.statements.nil?
[nil]
else
body = else_clause_node.statements.body
- body.unshift(nil) if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(else_clause_node.else_keyword_loc, else_clause_node.statements.body[0].location, allow_newline)
body
end
@@ -879,7 +1060,7 @@ module Prism
on_bodystmt(visit_statements_node_body([nil]), nil, nil, nil)
when StatementsNode
body = [*node.body]
- body.unshift(nil) if void_stmt?(location, body[0].location, allow_newline)
+ body = [nil, *body] if void_stmt?(location, body[0].location, allow_newline)
stmts = visit_statements_node_body(body)
bounds(node.body.first.location)
@@ -894,6 +1075,8 @@ module Prism
# foo(&bar)
# ^^^^
def visit_block_argument_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
visit(node.expression)
end
@@ -907,6 +1090,13 @@ module Prism
# Visit a BlockNode.
def visit_block_node(node)
braces = node.opening == "{"
+ bounds(node.opening_loc)
+ if braces
+ on_lbrace("{")
+ else
+ on_kw("do")
+ end
+
parameters = visit(node.parameters)
body =
@@ -919,7 +1109,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -931,6 +1121,14 @@ module Prism
end
if braces
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ else
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
+ if braces
bounds(node.location)
on_brace_block(parameters, body)
else
@@ -942,12 +1140,15 @@ module Prism
# def foo(&bar); end
# ^^^^
def visit_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+
if node.name_loc.nil?
bounds(node.location)
on_blockarg(nil)
else
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
bounds(node.location)
on_blockarg(name)
@@ -956,6 +1157,9 @@ module Prism
# A block's parameters.
def visit_block_parameters_node(node)
+ bounds(node.opening_loc)
+ on_op("|")
+
parameters =
if node.parameters.nil?
on_params(nil, nil, nil, nil, nil, nil, nil)
@@ -970,6 +1174,9 @@ module Prism
false
end
+ bounds(node.closing_loc)
+ on_op("|")
+
bounds(node.location)
on_block_var(parameters, locals)
end
@@ -980,6 +1187,9 @@ module Prism
# break foo
# ^^^^^^^^^
def visit_break_node(node)
+ bounds(node.keyword_loc)
+ on_kw("break")
+
if node.arguments.nil?
bounds(node.location)
on_break(on_args_new)
@@ -1004,20 +1214,32 @@ module Prism
case node.name
when :[]
receiver = visit(node.receiver)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
+ block = visit(block_node)
bounds(node.location)
call = on_aref(receiver, arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.location)
on_method_add_block(call, block)
+ else
+ call
end
when :[]=
receiver = visit(node.receiver)
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
*arguments, last_argument = node.arguments.arguments
arguments << node.block if !node.block.nil?
@@ -1033,6 +1255,11 @@ module Prism
end
end
+ bounds(node.closing_loc)
+ on_rbracket("]")
+ bounds(node.equal_loc)
+ on_op("=")
+
bounds(node.location)
call = on_aref_field(receiver, arguments)
value = visit_write_value(last_argument)
@@ -1040,17 +1267,54 @@ module Prism
bounds(last_argument.location)
on_assign(call, value)
when :-@, :+@, :~
- receiver = visit(node.receiver)
+ bounds(node.message_loc)
+ on_op(node.message)
+ receiver = visit(node.receiver)
bounds(node.location)
on_unary(node.name, receiver)
when :!
- receiver = visit(node.receiver)
+ bounds(node.message_loc)
+ if node.message == "not"
+ on_kw("not")
- bounds(node.location)
- on_unary(node.message == "not" ? :not : :!, receiver)
- when *BINARY_OPERATORS
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ receiver =
+ if node.receiver.is_a?(ParenthesesNode) && node.receiver.body.nil?
+ # The parens in `not()` just emit parens and nothing else.
+ bounds(node.receiver.opening_loc)
+ on_lparen("(")
+ bounds(node.receiver.closing_loc)
+ on_rparen(")")
+ nil
+ else
+ visit(node.receiver)
+ end
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+ bounds(node.location)
+ on_unary(:not, receiver)
+ else
+ on_op("!")
+
+ receiver = visit(node.receiver)
+
+ bounds(node.location)
+ on_unary(:!, receiver)
+ end
+ when BINARY_OPERATORS
receiver = visit(node.receiver)
+
+ bounds(node.message_loc)
+ on_op(node.message)
+
value = visit(node.arguments.arguments.first)
bounds(node.location)
@@ -1062,9 +1326,21 @@ module Prism
if node.variable_call?
on_vcall(message)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
- if node.opening_loc.nil? && arguments&.any?
+ if node.opening_loc.nil? && get_arguments_and_block(node.arguments, node.block).first.any?
bounds(node.location)
on_command(message, arguments)
elsif !node.opening_loc.nil?
@@ -1075,11 +1351,11 @@ module Prism
on_method_add_arg(on_fcall(message), on_args_new)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
@@ -1087,7 +1363,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
message =
if node.message_loc.nil?
@@ -1097,13 +1373,30 @@ module Prism
visit_token(node.message, false)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
if node.name.end_with?("=") && !node.message.end_with?("=") && !node.arguments.nil? && node.block.nil?
value = visit_write_value(node.arguments.arguments.first)
bounds(node.location)
on_assign(on_field(receiver, call_operator, message), value)
else
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+ if node.opening_loc
+ bounds(node.opening_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc || node.location))
+
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
call =
if node.opening_loc.nil?
bounds(node.location)
@@ -1121,27 +1414,35 @@ module Prism
on_method_add_arg(on_call(receiver, call_operator, message), arguments)
end
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
end
end
- # Visit the arguments and block of a call node and return the arguments
- # and block as they should be used.
- private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ # Extract the arguments and block Ripper-style, which means if the block
+ # is like `&b` then it's moved to arguments.
+ private def get_arguments_and_block(arguments_node, block_node)
arguments = arguments_node&.arguments || []
block = block_node
if block.is_a?(BlockArgumentNode)
- arguments << block
+ arguments += [block]
block = nil
end
+ [arguments, block]
+ end
+
+ # Visit the arguments and block of a call node and return the arguments
+ # and block as they should be used.
+ private def visit_call_node_arguments(arguments_node, block_node, trailing_comma)
+ arguments, block = get_arguments_and_block(arguments_node, block_node)
+
[
if arguments.length == 1 && arguments.first.is_a?(ForwardingArgumentsNode)
visit(arguments.first)
@@ -1155,7 +1456,7 @@ module Prism
on_args_add_block(args, false)
end
end,
- visit(block)
+ block,
]
end
@@ -1173,7 +1474,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1195,7 +1496,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1217,7 +1518,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1239,6 +1540,9 @@ module Prism
if node.call_operator == "::"
receiver = visit(node.receiver)
+ bounds(node.call_operator_loc)
+ on_op("::")
+
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1248,7 +1552,7 @@ module Prism
receiver = visit(node.receiver)
bounds(node.call_operator_loc)
- call_operator = visit_token(node.call_operator)
+ call_operator = visit_call_operator(node.call_operator)
bounds(node.message_loc)
message = visit_token(node.message)
@@ -1262,6 +1566,10 @@ module Prism
# ^^^^^^^^^^
def visit_capture_pattern_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
target = visit(node.target)
bounds(node.location)
@@ -1271,10 +1579,21 @@ module Prism
# case foo; when bar; end
# ^^^^^^^^^^^^^^^^^^^^^^^
def visit_case_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map { |condition| visit(condition) }
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition|
- on_when(*visit(condition), current)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_when(*condition, current)
end
bounds(node.location)
@@ -1284,10 +1603,23 @@ module Prism
# case foo; in bar; end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_case_match_node(node)
+ bounds(node.case_keyword_loc)
+ on_kw("case")
+
predicate = visit(node.predicate)
+ visited_conditions = node.conditions.map do | condition|
+ visit(condition)
+ end
+ visited_else_clause = visit(node.else_clause)
+
+ if !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
clauses =
- node.conditions.reverse_each.inject(visit(node.else_clause)) do |current, condition|
- on_in(*visit(condition), current)
+ visited_conditions.reverse_each.inject(visited_else_clause) do |current, condition|
+ on_in(*condition, current)
end
bounds(node.location)
@@ -1297,6 +1629,9 @@ module Prism
# class Foo; end
# ^^^^^^^^^^^^^^
def visit_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -1305,9 +1640,17 @@ module Prism
visit(node.constant_path)
end
+ if node.inheritance_operator_loc
+ bounds(node.inheritance_operator_loc)
+ on_op("<")
+ end
+
superclass = visit(node.superclass)
bodystmt = visit_body_node(node.superclass&.location || node.constant_path.location, node.body, node.superclass.nil?)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_class(constant_path, superclass, bodystmt)
end
@@ -1321,12 +1664,13 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_cvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1391,12 +1735,13 @@ module Prism
# Foo = 1
# ^^^^^^^
- #
- # Foo, Bar = 1
- # ^^^ ^^^
def visit_constant_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_const(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1456,6 +1801,11 @@ module Prism
# ^^^^^^^^
def visit_constant_path_node(node)
if node.parent.nil?
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1464,6 +1814,9 @@ module Prism
else
parent = visit(node.parent)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1474,11 +1827,12 @@ module Prism
# Foo::Bar = 1
# ^^^^^^^^^^^^
- #
- # Foo::Foo, Bar::Bar = 1
- # ^^^^^^^^ ^^^^^^^^
def visit_constant_path_write_node(node)
target = visit_constant_path_write_node_target(node.target)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1488,6 +1842,11 @@ module Prism
# Visit a constant path that is part of a write node.
private def visit_constant_path_write_node_target(node)
if node.parent.nil?
+ if node.delimiter_loc
+ bounds(node.delimiter_loc)
+ on_op("::")
+ end
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1496,6 +1855,9 @@ module Prism
else
parent = visit(node.parent)
+ bounds(node.delimiter_loc)
+ on_op("::")
+
bounds(node.name_loc)
child = on_const(node.name.to_s)
@@ -1508,7 +1870,6 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_constant_path_operator_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.binary_operator_loc)
operator = on_op("#{node.binary_operator}=")
@@ -1522,7 +1883,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_and_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("&&=")
@@ -1536,7 +1896,6 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_constant_path_or_write_node(node)
target = visit_constant_path_write_node_target(node.target)
- value = visit(node.value)
bounds(node.operator_loc)
operator = on_op("||=")
@@ -1558,16 +1917,24 @@ module Prism
# def self.foo; end
# ^^^^^^^^^^^^^^^^^
def visit_def_node(node)
+ bounds(node.def_keyword_loc)
+ on_kw("def")
+
receiver = visit(node.receiver)
operator =
if !node.operator_loc.nil?
bounds(node.operator_loc)
- visit_token(node.operator)
+ node.operator == "." ? on_period(".") : on_op("::")
end
bounds(node.name_loc)
name = visit_token(node.name_loc.slice)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
parameters =
if node.parameters.nil?
bounds(node.location)
@@ -1577,10 +1944,17 @@ module Prism
end
if !node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
parameters = on_paren(parameters)
end
+ if node.equal_loc
+ bounds(node.equal_loc)
+ on_op("=")
+ end
+
bodystmt =
if node.equal_loc.nil?
visit_body_node(node.rparen_loc || node.end_keyword_loc, node.body)
@@ -1591,11 +1965,16 @@ module Prism
on_bodystmt(body, nil, nil, nil)
end
+ if node.end_keyword_loc
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
- if receiver.nil?
- on_def(name, parameters, bodystmt)
- else
+ if receiver
on_defs(receiver, operator, name, parameters, bodystmt)
+ else
+ on_def(name, parameters, bodystmt)
end
end
@@ -1605,24 +1984,59 @@ module Prism
# defined?(a)
# ^^^^^^^^^^^
def visit_defined_node(node)
+ bounds(node.keyword_loc)
+ on_kw("defined?")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ expression = visit(node.value)
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ # Very weird circumstances here where something like:
+ #
+ # defined?
+ # (1)
+ #
+ # gets parsed in Ruby as having only the `1` expression but in Ripper it
+ # gets parsed as having a parentheses node. In this case we need to
+ # synthesize that node to match Ripper's behavior.
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+ bounds(node.lparen_loc.join(node.rparen_loc))
+ expression = on_paren(on_stmts_add(on_stmts_new, expression))
+ end
+
bounds(node.location)
- on_defined(visit(node.value))
+ on_defined(expression)
end
# if foo then bar else baz end
# ^^^^^^^^^^^^
def visit_else_node(node)
+ bounds(node.else_keyword_loc)
+ on_kw("else")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.else_keyword_loc, node.statements.body[0].location, false)
body
end
+ else_statements = visit_statements_node_body(statements)
+
+ bounds(node.end_keyword_loc)
+ on_kw("end")
bounds(node.location)
- on_else(visit_statements_node_body(statements))
+ on_else(else_statements)
end
# "foo #{bar}"
@@ -1660,12 +2074,15 @@ module Prism
# Visit an EnsureNode node.
def visit_ensure_node(node)
+ bounds(node.ensure_keyword_loc)
+ on_kw("ensure")
+
statements =
if node.statements.nil?
[nil]
else
body = node.statements.body
- body.unshift(nil) if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
+ body = [nil, *body] if void_stmt?(node.ensure_keyword_loc, body[0].location, false)
body
end
@@ -1686,6 +2103,14 @@ module Prism
# ^^^^^^^^^^^
def visit_find_pattern_node(node)
constant = visit(node.constant)
+
+ if node.opening_loc
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ end
+ bounds(node.left.operator_loc)
+ on_op("*")
+
left =
if node.left.expression.nil?
bounds(node.left.location)
@@ -1695,6 +2120,10 @@ module Prism
end
requireds = visit_all(node.requireds) if node.requireds.any?
+
+ bounds(node.right.operator_loc)
+ on_op("*")
+
right =
if node.right.expression.nil?
bounds(node.right.location)
@@ -1703,6 +2132,10 @@ module Prism
visit(node.right.expression)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ end
bounds(node.location)
on_fndptn(constant, left, requireds, right)
end
@@ -1711,6 +2144,10 @@ module Prism
# ^^^^^^^^^^
def visit_flip_flop_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -1730,8 +2167,18 @@ module Prism
# for foo in bar do end
# ^^^^^^^^^^^^^^^^^^^^^
def visit_for_node(node)
+ bounds(node.for_keyword_loc)
+ on_kw("for")
+
index = visit(node.index)
+ bounds(node.in_keyword_loc)
+ on_kw("in")
+
collection = visit(node.collection)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1740,6 +2187,9 @@ module Prism
visit(node.statements)
end
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_for(index, collection, statements)
end
@@ -1748,6 +2198,7 @@ module Prism
# ^^^
def visit_forwarding_arguments_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1755,6 +2206,7 @@ module Prism
# ^^^
def visit_forwarding_parameter_node(node)
bounds(node.location)
+ on_op("...")
on_args_forward
end
@@ -1764,6 +2216,9 @@ module Prism
# super {}
# ^^^^^^^^
def visit_forwarding_super_node(node)
+ bounds(node.keyword_loc)
+ on_kw("super")
+
if node.block.nil?
bounds(node.location)
on_zsuper
@@ -1784,12 +2239,13 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_gvar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -1848,6 +2304,9 @@ module Prism
# {}
# ^^
def visit_hash_node(node)
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
elements =
if node.elements.any?
args = visit_all(node.elements)
@@ -1856,6 +2315,8 @@ module Prism
on_assoclist_from_args(args)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_hash(elements)
end
@@ -1864,6 +2325,15 @@ module Prism
# ^^
def visit_hash_pattern_node(node)
constant = visit(node.constant)
+
+ if node.constant
+ bounds(node.opening_loc)
+ node.opening == "[" ? on_lbracket("[") : on_lparen("(")
+ elsif node.opening_loc
+ bounds(node.opening_loc)
+ on_lbrace("{")
+ end
+
elements =
if node.elements.any? || !node.rest.nil?
node.elements.map do |element|
@@ -1886,12 +2356,21 @@ module Prism
rest =
case node.rest
when AssocSplatNode
+ bounds(node.rest.operator_loc)
+ on_op("**")
visit(node.rest.value)
when NoKeywordsParameterNode
bounds(node.rest.location)
on_var_field(visit(node.rest))
end
+ if node.constant
+ bounds(node.closing_loc)
+ node.closing == "]" ? on_rbracket("]") : on_rparen(")")
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_rbrace("}")
+ end
bounds(node.location)
on_hshptn(constant, elements, rest)
end
@@ -1907,13 +2386,27 @@ module Prism
def visit_if_node(node)
if node.then_keyword == "?"
predicate = visit(node.predicate)
+
+ bounds(node.then_keyword_loc)
+ on_op("?")
+
truthy = visit(node.statements.body.first)
+
+ bounds(node.subsequent.else_keyword_loc)
+ on_op(":")
+
falsy = visit(node.subsequent.statements.body.first)
bounds(node.location)
on_ifop(predicate, truthy, falsy)
elsif node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
+ if node.then_keyword_loc && node.then_keyword != "?"
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1923,6 +2416,11 @@ module Prism
end
subsequent = visit(node.subsequent)
+ if node.end_keyword_loc && !node.subsequent
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
if node.if_keyword == "if"
on_if(predicate, statements, subsequent)
@@ -1931,6 +2429,8 @@ module Prism
end
else
statements = visit(node.statements.body.first)
+ bounds(node.if_keyword_loc)
+ on_kw(node.if_keyword)
predicate = visit(node.predicate)
bounds(node.location)
@@ -1962,7 +2462,14 @@ module Prism
# This is a special case where we're not going to call on_in directly
# because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.in_loc)
+ on_kw("in")
+
pattern = visit_pattern_node(node.pattern)
+ if node.then_loc
+ bounds(node.then_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -1978,8 +2485,15 @@ module Prism
# ^^^^^^^^^^^^^^^
def visit_index_operator_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -1995,8 +2509,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_and_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2012,8 +2533,15 @@ module Prism
# ^^^^^^^^^^^^^^^^
def visit_index_or_write_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
target = on_aref_field(receiver, arguments)
@@ -2029,8 +2557,15 @@ module Prism
# ^^^^^^^^
def visit_index_target_node(node)
receiver = visit(node.receiver)
+
+ bounds(node.opening_loc)
+ on_lbracket("[")
+
arguments, _ = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.closing_loc))
+ bounds(node.closing_loc)
+ on_rbracket("]")
+
bounds(node.location)
on_aref_field(receiver, arguments)
end
@@ -2047,6 +2582,10 @@ module Prism
def visit_instance_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ivar(node.name.to_s))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2149,20 +2688,37 @@ module Prism
# "foo #{bar}"
# ^^^^^^^^^^^^
def visit_interpolated_string_node(node)
- if node.opening&.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node)
+ with_string_bounds(node) do
+ if node.opening&.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node)
- bounds(node.location)
- on_string_literal(heredoc)
- elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
- first, *rest = node.parts
- rest.inject(visit(first)) do |content, part|
- concat = visit(part)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ elsif !node.heredoc? && node.parts.length > 1 && node.parts.any? { |part| (part.is_a?(StringNode) || part.is_a?(InterpolatedStringNode)) && !part.opening_loc.nil? }
+ first, *rest = node.parts
+ rest.inject(visit(first)) do |content, part|
+ concat = visit(part)
+
+ bounds(part.location)
+ on_string_concat(content, concat)
+ end
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_string_content) do |content, part|
+ on_string_add(content, visit_string_content(part))
+ end
- bounds(part.location)
- on_string_concat(content, concat)
+ bounds(node.location)
+ on_string_literal(parts)
end
- else
+ end
+ end
+
+ # :"foo #{bar}"
+ # ^^^^^^^^^^^^^
+ def visit_interpolated_symbol_node(node)
+ with_string_bounds(node) do
bounds(node.parts.first.location)
parts =
node.parts.inject(on_string_content) do |content, part|
@@ -2170,40 +2726,29 @@ module Prism
end
bounds(node.location)
- on_string_literal(parts)
+ on_dyna_symbol(parts)
end
end
- # :"foo #{bar}"
- # ^^^^^^^^^^^^^
- def visit_interpolated_symbol_node(node)
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_string_content) do |content, part|
- on_string_add(content, visit_string_content(part))
- end
-
- bounds(node.location)
- on_dyna_symbol(parts)
- end
-
# `foo #{bar}`
# ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node)
- if node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node)
+ with_string_bounds(node) do
+ if node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.parts.first.location)
- parts =
- node.parts.inject(on_xstring_new) do |content, part|
- on_xstring_add(content, visit_string_content(part))
- end
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.parts.first.location)
+ parts =
+ node.parts.inject(on_xstring_new) do |content, part|
+ on_xstring_add(content, visit_string_content(part))
+ end
- bounds(node.location)
- on_xstring_literal(parts)
+ bounds(node.location)
+ on_xstring_literal(parts)
+ end
end
end
@@ -2244,6 +2789,9 @@ module Prism
# def foo(**); end
# ^^
def visit_keyword_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+
if node.name_loc.nil?
bounds(node.location)
on_kwrest_param(nil)
@@ -2263,6 +2811,11 @@ module Prism
parameters =
if node.parameters.is_a?(BlockParametersNode)
+ if node.parameters.opening_loc
+ bounds(node.parameters.opening_loc)
+ on_lparen("(")
+ end
+
# Ripper does not track block-locals within lambdas, so we skip
# directly to the parameters here.
params =
@@ -2273,6 +2826,13 @@ module Prism
visit(node.parameters.parameters)
end
+ visit_all(node.parameters.locals)
+
+ if node.parameters.closing_loc
+ bounds(node.parameters.closing_loc)
+ on_rparen(")")
+ end
+
if node.parameters.opening_loc.nil?
params
else
@@ -2285,9 +2845,11 @@ module Prism
end
braces = node.opening == "{"
+ bounds(node.opening_loc)
if braces
- bounds(node.opening_loc)
on_tlambeg(node.opening)
+ else
+ on_kw("do")
end
body =
@@ -2300,7 +2862,7 @@ module Prism
braces ? stmts : on_bodystmt(stmts, nil, nil, nil)
when StatementsNode
stmts = node.body.body
- stmts.unshift(nil) if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
+ stmts = [nil, *stmts] if void_stmt?(node.parameters&.location || node.opening_loc, node.body.location, false)
stmts = visit_statements_node_body(stmts)
bounds(node.body.location)
@@ -2311,6 +2873,13 @@ module Prism
raise
end
+ bounds(node.closing_loc)
+ if braces
+ on_rbrace("}")
+ else
+ on_kw("end")
+ end
+
bounds(node.location)
on_lambda(parameters, body)
end
@@ -2327,6 +2896,10 @@ module Prism
def visit_local_variable_write_node(node)
bounds(node.name_loc)
target = on_var_field(on_ident(node.name_loc.slice))
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit_write_value(node.value)
bounds(node.location)
@@ -2401,6 +2974,8 @@ module Prism
# ^^^^^^^^^^
def visit_match_predicate_node(node)
value = visit(node.value)
+ bounds(node.operator_loc)
+ on_kw("in")
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2410,6 +2985,10 @@ module Prism
# ^^^^^^^^^^
def visit_match_required_node(node)
value = visit(node.value)
+
+ bounds(node.operator_loc)
+ on_op("=>")
+
pattern = on_in(visit_pattern_node(node.pattern), nil, nil)
on_case(value, pattern)
@@ -2423,13 +3002,16 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error.
- def visit_missing_node(node)
- raise "Cannot visit missing nodes directly."
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery nodes directly."
end
# module Foo; end
# ^^^^^^^^^^^^^^^
def visit_module_node(node)
+ bounds(node.module_keyword_loc)
+ on_kw("module")
+
constant_path =
if node.constant_path.is_a?(ConstantReadNode)
bounds(node.constant_path.location)
@@ -2440,6 +3022,9 @@ module Prism
bodystmt = visit_body_node(node.constant_path.location, node.body, true)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_module(constant_path, bodystmt)
end
@@ -2447,9 +3032,19 @@ module Prism
# (foo, bar), bar = qux
# ^^^^^^^^^^
def visit_multi_target_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
if node.lparen_loc.nil?
targets
else
@@ -2501,9 +3096,22 @@ module Prism
# foo, bar = baz
# ^^^^^^^^^^^^^^
def visit_multi_write_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, true)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ bounds(node.operator_loc)
+ on_op("=")
+
unless node.lparen_loc.nil?
bounds(node.lparen_loc)
targets = on_mlhs_paren(targets)
@@ -2521,6 +3129,9 @@ module Prism
# next foo
# ^^^^^^^^
def visit_next_node(node)
+ bounds(node.keyword_loc)
+ on_kw("next")
+
if node.arguments.nil?
bounds(node.location)
on_next(on_args_new)
@@ -2539,9 +3150,24 @@ module Prism
on_var_ref(on_kw("nil"))
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("&")
+ bounds(node.keyword_loc)
+ on_kw("nil")
+ bounds(node.location)
+ on_blockarg(:nil)
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("**")
+ bounds(node.keyword_loc)
+ on_kw("nil")
bounds(node.location)
on_nokw_param(nil)
@@ -2574,7 +3200,11 @@ module Prism
# ^^^^^^^
def visit_optional_parameter_node(node)
bounds(node.name_loc)
- name = visit_token(node.name.to_s)
+ name = on_ident(node.name.to_s)
+
+ bounds(node.operator_loc)
+ on_op("=")
+
value = visit(node.value)
[name, value]
@@ -2584,6 +3214,14 @@ module Prism
# ^^^^^^
def visit_or_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ if node.operator == "or"
+ on_kw("or")
+ else
+ on_op("||")
+ end
+
right = visit(node.right)
bounds(node.location)
@@ -2607,9 +3245,19 @@ module Prism
# Visit a destructured positional parameter node.
private def visit_destructured_parameter_node(node)
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
bounds(node.location)
targets = visit_multi_target_node_targets(node.lefts, node.rest, node.rights, false)
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
bounds(node.lparen_loc)
on_mlhs_paren(targets)
end
@@ -2620,6 +3268,9 @@ module Prism
# (1)
# ^^^
def visit_parentheses_node(node)
+ bounds(node.opening_loc)
+ on_lparen("(")
+
body =
if node.body.nil?
on_stmts_add(on_stmts_new, on_void_stmt)
@@ -2627,6 +3278,8 @@ module Prism
visit(node.body)
end
+ bounds(node.closing_loc)
+ on_rparen(")")
bounds(node.location)
on_paren(body)
end
@@ -2634,8 +3287,15 @@ module Prism
# foo => ^(bar)
# ^^^^^^
def visit_pinned_expression_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+ bounds(node.lparen_loc)
+ on_lparen("(")
+
expression = visit(node.expression)
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.location)
on_begin(expression)
end
@@ -2643,12 +3303,20 @@ module Prism
# foo = 1 and bar => ^foo
# ^^^^
def visit_pinned_variable_node(node)
+ bounds(node.operator_loc)
+ on_op("^")
+
visit(node.variable)
end
# END {}
# ^^^^^^
def visit_post_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("END")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2657,6 +3325,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_END(statements)
end
@@ -2664,6 +3334,11 @@ module Prism
# BEGIN {}
# ^^^^^^^^
def visit_pre_execution_node(node)
+ bounds(node.keyword_loc)
+ on_kw("BEGIN")
+ bounds(node.opening_loc)
+ on_lbrace("{")
+
statements =
if node.statements.nil?
bounds(node.location)
@@ -2672,6 +3347,8 @@ module Prism
visit(node.statements)
end
+ bounds(node.closing_loc)
+ on_rbrace("}")
bounds(node.location)
on_BEGIN(statements)
end
@@ -2679,7 +3356,7 @@ module Prism
# The top-level program node.
def visit_program_node(node)
body = node.statements.body
- body << nil if body.empty?
+ body = [nil] if body.empty?
statements = visit_statements_node_body(body)
bounds(node.location)
@@ -2690,6 +3367,10 @@ module Prism
# ^^^^
def visit_range_node(node)
left = visit(node.left)
+
+ bounds(node.operator_loc)
+ on_op(node.operator)
+
right = visit(node.right)
bounds(node.location)
@@ -2710,6 +3391,7 @@ module Prism
# ^^^^
def visit_redo_node(node)
bounds(node.location)
+ on_kw("redo")
on_redo
end
@@ -2752,6 +3434,9 @@ module Prism
# foo rescue bar
# ^^^^^^^^^^^^^^
def visit_rescue_modifier_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
expression = visit_write_value(node.expression)
rescue_expression = visit(node.rescue_expression)
@@ -2762,6 +3447,9 @@ module Prism
# begin; rescue; end
# ^^^^^^^
def visit_rescue_node(node)
+ bounds(node.keyword_loc)
+ on_kw("rescue")
+
exceptions =
case node.exceptions.length
when 0
@@ -2799,6 +3487,11 @@ module Prism
end
end
+ if node.operator_loc
+ bounds(node.operator_loc)
+ on_op("=>")
+ end
+
reference = visit(node.reference)
statements =
if node.statements.nil?
@@ -2820,12 +3513,15 @@ module Prism
# def foo(*); end
# ^
def visit_rest_parameter_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
+
if node.name_loc.nil?
bounds(node.location)
on_rest_param(nil)
else
bounds(node.name_loc)
- on_rest_param(visit_token(node.name.to_s))
+ on_rest_param(on_ident(node.name.to_s))
end
end
@@ -2833,6 +3529,7 @@ module Prism
# ^^^^^
def visit_retry_node(node)
bounds(node.location)
+ on_kw("retry")
on_retry
end
@@ -2842,6 +3539,9 @@ module Prism
# return 1
# ^^^^^^^^
def visit_return_node(node)
+ bounds(node.keyword_loc)
+ on_kw("return")
+
if node.arguments.nil?
bounds(node.location)
on_return0
@@ -2868,9 +3568,17 @@ module Prism
# class << self; end
# ^^^^^^^^^^^^^^^^^^
def visit_singleton_class_node(node)
+ bounds(node.class_keyword_loc)
+ on_kw("class")
+ bounds(node.operator_loc)
+ on_op("<<")
+
expression = visit(node.expression)
bodystmt = visit_body_node(node.body&.location || node.end_keyword_loc, node.body)
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+
bounds(node.location)
on_sclass(expression, bodystmt)
end
@@ -2905,6 +3613,8 @@ module Prism
# def foo(*); bar(*); end
# ^
def visit_splat_node(node)
+ bounds(node.operator_loc)
+ on_op("*")
visit(node.expression)
end
@@ -2927,26 +3637,68 @@ module Prism
# "foo"
# ^^^^^
def visit_string_node(node)
- if (content = node.content).empty?
- bounds(node.location)
- on_string_literal(on_string_content)
- elsif (opening = node.opening) == "?"
- bounds(node.location)
- on_CHAR("?#{node.content}")
- elsif opening.start_with?("<<~")
- heredoc = visit_heredoc_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if (content = node.content).empty?
+ bounds(node.location)
+ on_string_literal(on_string_content)
+ elsif (opening = node.opening) == "?"
+ bounds(node.location)
+ on_CHAR("?#{node.content}")
+ elsif opening.start_with?("<<~")
+ heredoc = visit_heredoc_string_node(node.to_interpolated)
- bounds(node.location)
- on_string_literal(heredoc)
- else
- bounds(node.content_loc)
- tstring_content = on_tstring_content(content)
+ bounds(node.location)
+ on_string_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ tstring_content = on_tstring_content(content)
- bounds(node.location)
- on_string_literal(on_string_add(on_string_content, tstring_content))
+ bounds(node.location)
+ on_string_literal(on_string_add(on_string_content, tstring_content))
+ end
end
end
+ # Responsible for emitting the various string-like begin/end events
+ private def with_string_bounds(node)
+ # `foo "bar": baz` doesn't emit the closing location
+ assoc = !(opening = node.opening)&.include?(":") && node.closing&.end_with?(":")
+
+ is_heredoc = opening&.start_with?("<<")
+ if is_heredoc
+ bounds(node.opening_loc)
+ on_heredoc_beg(node.opening)
+ elsif opening&.start_with?(":", "%s")
+ bounds(node.opening_loc)
+ on_symbeg(node.opening)
+ elsif opening&.start_with?("`", "%x")
+ bounds(node.opening_loc)
+ on_backtick(node.opening)
+ elsif opening && !opening.start_with?("?")
+ bounds(node.opening_loc)
+ on_tstring_beg(opening)
+ end
+
+ result = yield
+ if assoc
+ if node.closing != ":"
+ bounds(node.closing_loc)
+ on_label_end(node.closing)
+ end
+ return result
+ end
+
+ if is_heredoc
+ bounds(node.closing_loc)
+ on_heredoc_end(node.closing)
+ elsif node.closing_loc
+ bounds(node.closing_loc)
+ on_tstring_end(node.closing)
+ end
+
+ result
+ end
+
# Ripper gives back the escaped string content but strips out the common
# leading whitespace. Prism gives back the unescaped string content and
# a location for the escaped string content. Unfortunately these don't
@@ -3024,42 +3776,39 @@ module Prism
# Visit a heredoc node that is representing a string.
private def visit_heredoc_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_string_content) do |parts, part|
- on_string_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_string_content) do |parts, part|
+ on_string_add(parts, part)
+ end
end
# Visit a heredoc node that is representing an xstring.
private def visit_heredoc_x_string_node(node)
- bounds(node.opening_loc)
- on_heredoc_beg(node.opening)
-
bounds(node.location)
- result =
- visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
- on_xstring_add(parts, part)
- end
-
- bounds(node.closing_loc)
- on_heredoc_end(node.closing)
-
- result
+ visit_heredoc_node(node.parts, on_xstring_new) do |parts, part|
+ on_xstring_add(parts, part)
+ end
end
# super(foo)
# ^^^^^^^^^^
def visit_super_node(node)
- arguments, block = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+ bounds(node.keyword_loc)
+ on_kw("super")
+
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
+ arguments, block_node = visit_call_node_arguments(node.arguments, node.block, trailing_comma?(node.arguments&.location || node.location, node.rparen_loc || node.location))
+
+ if node.rparen_loc
+ bounds(node.rparen_loc)
+ on_rparen(")")
+ end
+
+ block = visit(block_node)
if !node.lparen_loc.nil?
bounds(node.lparen_loc)
@@ -3069,35 +3818,36 @@ module Prism
bounds(node.location)
call = on_super(arguments)
- if block.nil?
- call
- else
+ if block_node
bounds(node.block.location)
on_method_add_block(call, block)
+ else
+ call
end
end
# :foo
# ^^^^
def visit_symbol_node(node)
- if (opening = node.opening)&.match?(/^%s|['"]:?$/)
- bounds(node.value_loc)
- content = on_string_content
-
- if !(value = node.value).empty?
- content = on_string_add(content, on_tstring_content(value))
+ with_string_bounds(node) do
+ if node.value_loc.nil?
+ bounds(node.location)
+ on_dyna_symbol(on_string_content)
+ elsif (opening = node.opening)&.match?(/^%s|['"]:?$/)
+ bounds(node.value_loc)
+ content = on_string_add(on_string_content, on_tstring_content(node.value))
+ bounds(node.location)
+ on_dyna_symbol(content)
+ elsif (closing = node.closing) == ":"
+ bounds(node.location)
+ on_label("#{node.value}:")
+ elsif opening.nil? && node.closing_loc.nil?
+ bounds(node.value_loc)
+ on_symbol_literal(visit_token(node.value))
+ else
+ bounds(node.value_loc)
+ on_symbol_literal(on_symbol(visit_token(node.value)))
end
-
- on_dyna_symbol(content)
- elsif (closing = node.closing) == ":"
- bounds(node.location)
- on_label("#{node.value}:")
- elsif opening.nil? && node.closing_loc.nil?
- bounds(node.value_loc)
- on_symbol_literal(visit_token(node.value))
- else
- bounds(node.value_loc)
- on_symbol_literal(on_symbol(visit_token(node.value)))
end
end
@@ -3111,6 +3861,9 @@ module Prism
# undef foo
# ^^^^^^^^^
def visit_undef_node(node)
+ bounds(node.keyword_loc)
+ on_kw("undef")
+
names = visit_all(node.names)
bounds(node.location)
@@ -3124,7 +3877,13 @@ module Prism
# ^^^^^^^^^^^^^^
def visit_unless_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3134,10 +3893,17 @@ module Prism
end
else_clause = visit(node.else_clause)
+ if node.end_keyword_loc && !node.else_clause
+ bounds(node.end_keyword_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_unless(predicate, statements, else_clause)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("unless")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3151,7 +3917,14 @@ module Prism
# bar until foo
# ^^^^^^^^^^^^^
def visit_until_node(node)
+ bounds(node.keyword_loc)
+ on_kw("until")
+
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
statements =
if node.statements.nil?
@@ -3161,6 +3934,11 @@ module Prism
visit(node.statements)
end
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
+
bounds(node.location)
on_until(predicate, statements)
else
@@ -3178,7 +3956,14 @@ module Prism
# This is a special case where we're not going to call on_when directly
# because we don't have access to the subsequent. Instead, we'll return
# the component parts and let the parent node handle it.
+ bounds(node.keyword_loc)
+ on_kw("when")
+
conditions = visit_arguments(node.conditions)
+ if node.then_keyword_loc
+ bounds(node.then_keyword_loc)
+ on_kw("then")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3197,7 +3982,17 @@ module Prism
# ^^^^^^^^^^^^^
def visit_while_node(node)
if node.statements.nil? || (node.predicate.location.start_offset < node.statements.location.start_offset)
+ bounds(node.keyword_loc)
+ on_kw("while")
+ if node.do_keyword_loc
+ bounds(node.do_keyword_loc)
+ on_kw("do")
+ end
predicate = visit(node.predicate)
+ if node.closing_loc
+ bounds(node.closing_loc)
+ on_kw("end")
+ end
statements =
if node.statements.nil?
bounds(node.location)
@@ -3210,6 +4005,8 @@ module Prism
on_while(predicate, statements)
else
statements = visit(node.statements.body.first)
+ bounds(node.keyword_loc)
+ on_kw("while")
predicate = visit(node.predicate)
bounds(node.location)
@@ -3220,20 +4017,22 @@ module Prism
# `foo`
# ^^^^^
def visit_x_string_node(node)
- if node.unescaped.empty?
- bounds(node.location)
- on_xstring_literal(on_xstring_new)
- elsif node.opening.start_with?("<<~")
- heredoc = visit_heredoc_x_string_node(node.to_interpolated)
+ with_string_bounds(node) do
+ if node.unescaped.empty?
+ bounds(node.location)
+ on_xstring_literal(on_xstring_new)
+ elsif node.opening.start_with?("<<~")
+ heredoc = visit_heredoc_x_string_node(node.to_interpolated)
- bounds(node.location)
- on_xstring_literal(heredoc)
- else
- bounds(node.content_loc)
- content = on_tstring_content(node.content)
+ bounds(node.location)
+ on_xstring_literal(heredoc)
+ else
+ bounds(node.content_loc)
+ content = on_tstring_content(node.content)
- bounds(node.location)
- on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ bounds(node.location)
+ on_xstring_literal(on_xstring_add(on_xstring_new, content))
+ end
end
end
@@ -3243,10 +4042,18 @@ module Prism
# yield 1
# ^^^^^^^
def visit_yield_node(node)
+ bounds(node.keyword_loc)
+ on_kw("yield")
+
if node.arguments.nil? && node.lparen_loc.nil?
bounds(node.location)
on_yield0
else
+ if node.lparen_loc
+ bounds(node.lparen_loc)
+ on_lparen("(")
+ end
+
arguments =
if node.arguments.nil?
bounds(node.location)
@@ -3256,6 +4063,8 @@ module Prism
end
unless node.lparen_loc.nil?
+ bounds(node.rparen_loc)
+ on_rparen(")")
bounds(node.lparen_loc)
arguments = on_paren(arguments)
end
@@ -3269,7 +4078,11 @@ module Prism
# Lazily initialize the parse result.
def result
- @result ||= Prism.parse(source, partial_script: true)
+ @result ||= Prism.parse(source, partial_script: true, version: "current", freeze: true, encoding: source.encoding)
+ end
+
+ def line_and_column_cache
+ @line_and_column_cache ||= LineAndColumnCache.new(result.source)
end
##########################################################################
@@ -3290,30 +4103,34 @@ module Prism
# Visit the string content of a particular node. This method is used to
# split into the various token types.
def visit_token(token, allow_keywords = true)
- case token
- when "."
+ if token == "."
on_period(token)
- when "`"
+ elsif token == "`"
on_backtick(token)
- when *(allow_keywords ? KEYWORDS : [])
+ elsif allow_keywords && KEYWORDS.include?(token)
on_kw(token)
- when /^_/
+ elsif token.start_with?("_")
on_ident(token)
- when /^[[:upper:]]\w*$/
+ elsif token.match?(/^[[:upper:]]\w*$/)
on_const(token)
- when /^@@/
+ elsif token.start_with?("@@")
on_cvar(token)
- when /^@/
+ elsif token.start_with?("@")
on_ivar(token)
- when /^\$/
+ elsif token.start_with?("$")
on_gvar(token)
- when /^[[:punct:]]/
+ elsif token.match?(/^[[:punct:]]/)
on_op(token)
else
on_ident(token)
end
end
+ # Visit either `.`, `&.`, or `::`.
+ def visit_call_operator(token)
+ token == "." ? on_period(token) : on_op(token)
+ end
+
# Visit a node that represents a number. We need to explicitly handle the
# unary - operator.
def visit_number_node(node)
@@ -3321,6 +4138,9 @@ module Prism
location = node.location
if slice[0] == "-"
+ bounds(location.copy(length: 1))
+ on_op("-")
+
bounds(location.copy(start_offset: location.start_offset + 1))
value = yield slice[1..-1]
@@ -3369,26 +4189,24 @@ module Prism
# This method is responsible for updating lineno and column information
# to reflect the current node.
- #
- # This method could be drastically improved with some caching on the start
- # of every line, but for now it's good enough.
def bounds(location)
- @lineno = location.start_line
- @column = location.start_column
+ @lineno, @column = line_and_column_cache.line_and_column(location.start_offset)
end
+ # :startdoc:
+
##########################################################################
# Ripper interface
##########################################################################
# :stopdoc:
def _dispatch_0; end
- def _dispatch_1(_); end
- def _dispatch_2(_, _); end
- def _dispatch_3(_, _, _); end
- def _dispatch_4(_, _, _, _); end
- def _dispatch_5(_, _, _, _, _); end
- def _dispatch_7(_, _, _, _, _, _, _); end
+ def _dispatch_1(arg); arg end
+ def _dispatch_2(arg, _); arg end
+ def _dispatch_3(arg, _, _); arg end
+ def _dispatch_4(arg, _, _, _); arg end
+ def _dispatch_5(arg, _, _, _, _); arg end
+ def _dispatch_7(arg, _, _, _, _, _, _); arg end
# :startdoc:
#
diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb
new file mode 100644
index 0000000000..19deef2d37
--- /dev/null
+++ b/lib/prism/translation/ripper/filter.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Prism
+ module Translation
+ class Ripper
+ class Filter # :nodoc:
+ # :stopdoc:
+ def initialize(src, filename = '-', lineno = 1)
+ @__lexer = Lexer.new(src, filename, lineno)
+ @__line = nil
+ @__col = nil
+ @__state = nil
+ end
+
+ def filename
+ @__lexer.filename
+ end
+
+ def lineno
+ @__line
+ end
+
+ def column
+ @__col
+ end
+
+ def state
+ @__state
+ end
+
+ def parse(init = nil)
+ data = init
+ @__lexer.lex.each do |pos, event, tok, state|
+ @__line, @__col = *pos
+ @__state = state
+ data = if respond_to?(event, true)
+ then __send__(event, tok, data)
+ else on_default(event, tok, data)
+ end
+ end
+ data
+ end
+
+ private
+
+ def on_default(event, token, data)
+ data
+ end
+ # :startdoc:
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb
new file mode 100644
index 0000000000..c6aeae4bd7
--- /dev/null
+++ b/lib/prism/translation/ripper/lexer.rb
@@ -0,0 +1,133 @@
+# frozen_string_literal: true
+# :markup: markdown
+
+require_relative "../ripper"
+
+module Prism
+ module Translation
+ class Ripper
+ class Lexer < Ripper # :nodoc:
+ class State # :nodoc:
+ attr_reader :to_int, :to_s
+
+ def initialize(i)
+ @to_int = i
+ @to_s = Ripper.lex_state_name(i)
+ freeze
+ end
+
+ def [](index)
+ case index
+ when 0, :to_int
+ @to_int
+ when 1, :to_s
+ @to_s
+ else
+ nil
+ end
+ end
+
+ alias to_i to_int
+ alias inspect to_s
+ def pretty_print(q) q.text(to_s) end
+ def ==(i) super or to_int == i end
+ def &(i) self.class.new(to_int & i) end
+ def |(i) self.class.new(to_int | i) end
+ def allbits?(i) to_int.allbits?(i) end
+ def anybits?(i) to_int.anybits?(i) end
+ def nobits?(i) to_int.nobits?(i) end
+
+ # Instances are frozen and there are only a handful of them so we
+ # cache them here.
+ STATES = Hash.new { |hash, key| hash[key] = State.new(key) }
+ private_constant :STATES
+
+ def self.[](i)
+ STATES[i]
+ end
+ end
+
+ class Elem # :nodoc:
+ attr_accessor :pos, :event, :tok, :state, :message
+
+ def initialize(pos, event, tok, state, message = nil)
+ @pos = pos
+ @event = event
+ @tok = tok
+ @state = State[state]
+ @message = message
+ end
+
+ def [](index)
+ case index
+ when 0, :pos
+ @pos
+ when 1, :event
+ @event
+ when 2, :tok
+ @tok
+ when 3, :state
+ @state
+ when 4, :message
+ @message
+ else
+ nil
+ end
+ end
+
+ def inspect
+ "#<#{self.class}: #{event}@#{pos[0]}:#{pos[1]}:#{state}: #{tok.inspect}#{": " if message}#{message}>"
+ end
+
+ alias to_s inspect
+
+ def pretty_print(q)
+ q.group(2, "#<#{self.class}:", ">") {
+ q.breakable
+ q.text("#{event}@#{pos[0]}:#{pos[1]}")
+ q.breakable
+ state.pretty_print(q)
+ q.breakable
+ q.text("token: ")
+ tok.pretty_print(q)
+ if message
+ q.breakable
+ q.text("message: ")
+ q.text(message)
+ end
+ }
+ end
+
+ def to_a
+ if @message
+ [@pos, @event, @tok, @state, @message]
+ else
+ [@pos, @event, @tok, @state]
+ end
+ end
+ end
+
+ # Pretty much just the same as Prism.lex_compat.
+ def lex(raise_errors: false)
+ Ripper.lex(@source, filename, lineno, raise_errors: raise_errors)
+ end
+
+ # Returns the lex_compat result wrapped in `Elem`. Errors are omitted.
+ # Since ripper is a streaming parser, tokens are expected to be emitted in the order
+ # that the parser encounters them. This is not implemented.
+ def parse(...)
+ lex(...).map do |position, event, token, state|
+ Elem.new(position, event, token, state.to_int)
+ end
+ end
+
+ # Similar to parse but ripper sorts the elements by position in the source. Also
+ # includes errors. Since prism does error recovery, in cases of syntax errors
+ # the result may differ greatly compared to ripper.
+ def scan(...)
+ parse(...)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/prism/translation/ripper/sexp.rb b/lib/prism/translation/ripper/sexp.rb
index dc26a639a3..46c0333544 100644
--- a/lib/prism/translation/ripper/sexp.rb
+++ b/lib/prism/translation/ripper/sexp.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
+# :markup: markdown
require_relative "../ripper"
@@ -7,9 +8,7 @@ module Prism
class Ripper
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
# returns the arrays of [type, *children].
- class SexpBuilder < Ripper
- # :stopdoc:
-
+ class SexpBuilder < Ripper # :nodoc:
attr_reader :error
private
@@ -64,16 +63,12 @@ module Prism
remove_method :on_parse_error
alias on_parse_error on_error
alias compile_error on_error
-
- # :startdoc:
end
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
# returns the same values as ::Ripper::SexpBuilder except with a couple of
# niceties that flatten linked lists into arrays.
- class SexpBuilderPP < SexpBuilder
- # :stopdoc:
-
+ class SexpBuilderPP < SexpBuilder # :nodoc:
private
def on_heredoc_dedent(val, width)
@@ -117,8 +112,6 @@ module Prism
alias_method "on_#{event}", :_dispatch_event_push
end
end
-
- # :startdoc:
end
end
end
diff --git a/lib/prism/translation/ripper/shim.rb b/lib/prism/translation/ripper/shim.rb
index 10e21cd16a..00ed625da3 100644
--- a/lib/prism/translation/ripper/shim.rb
+++ b/lib/prism/translation/ripper/shim.rb
@@ -2,4 +2,6 @@
# This writes the prism ripper translation into the Ripper constant so that
# users can transparently use Ripper without any changes.
+# :stopdoc:
Ripper = Prism::Translation::Ripper
+# :startdoc:
diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb
index 465b693470..42bc5ee658 100644
--- a/lib/prism/translation/ruby_parser.rb
+++ b/lib/prism/translation/ruby_parser.rb
@@ -1,21 +1,27 @@
# frozen_string_literal: true
+# :markup: markdown
begin
- require "ruby_parser"
+ require "sexp"
rescue LoadError
- warn(%q{Error: Unable to load ruby_parser. Add `gem "ruby_parser"` to your Gemfile.})
+ warn(%q{Error: Unable to load sexp. Add `gem "sexp_processor"` to your Gemfile.})
exit(1)
end
+class RubyParser # :nodoc:
+ class SyntaxError < RuntimeError # :nodoc:
+ end
+end
+
module Prism
module Translation
# This module is the entry-point for converting a prism syntax tree into the
# seattlerb/ruby_parser gem's syntax tree.
class RubyParser
# A prism visitor that builds Sexp objects.
- class Compiler < ::Prism::Compiler
+ class Compiler < ::Prism::Compiler # :nodoc:
# This is the name of the file that we are compiling. We set it on every
- # Sexp object that is generated, and also use it to compile __FILE__
+ # Sexp object that is generated, and also use it to compile `__FILE__`
# nodes.
attr_reader :file
@@ -131,7 +137,7 @@ module Prism
# $+
# ^^
def visit_back_reference_read_node(node)
- s(node, :back_ref, node.name.name.delete_prefix("$").to_sym)
+ s(node, :back_ref, node.name.to_s.delete_prefix("$").to_sym)
end
# begin end
@@ -366,14 +372,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :class, name, visit(node.superclass))
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :class, name, visit(node.superclass))
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :class, name, visit(node.superclass)).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :class, name, visit(node.superclass), node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# @@foo
@@ -384,9 +394,6 @@ module Prism
# @@foo = 1
# ^^^^^^^^^
- #
- # @@foo, @@bar = 1
- # ^^^^^ ^^^^^
def visit_class_variable_write_node(node)
s(node, class_variable_write_type, node.name, visit_write_value(node.value))
end
@@ -524,7 +531,9 @@ module Prism
s(node, :defs, visit(node.receiver), name)
end
+ attach_comments(result, node)
result.line(node.name_loc.start_line)
+
if node.parameters.nil?
result << s(node, :args).line(node.name_loc.start_line)
else
@@ -639,9 +648,6 @@ module Prism
# $foo = 1
# ^^^^^^^^
- #
- # $foo, $bar = 1
- # ^^^^ ^^^^
def visit_global_variable_write_node(node)
s(node, :gasgn, node.name, visit_write_value(node.value))
end
@@ -787,9 +793,6 @@ module Prism
# @foo = 1
# ^^^^^^^^
- #
- # @foo, @bar = 1
- # ^^^^ ^^^^
def visit_instance_variable_write_node(node)
s(node, :iasgn, node.name, visit_write_value(node.value))
end
@@ -929,9 +932,9 @@ module Prism
if result == :space
# continue
elsif result.is_a?(String)
- results[0] << result
+ results[0] = "#{results[0]}#{result}"
elsif result.is_a?(Array) && result[0] == :str
- results[0] << result[1]
+ results[0] = "#{results[0]}#{result[1]}"
else
results << result
state = :interpolated_content
@@ -940,7 +943,7 @@ module Prism
if result == :space
# continue
elsif visited[index - 1] != :space && result.is_a?(Array) && result[0] == :str && results[-1][0] == :str && (results[-1].line_max == result.line)
- results[-1][1] << result[1]
+ results[-1][1] = "#{results[-1][1]}#{result[1]}"
results[-1].line_max = result.line_max
else
results << result
@@ -976,8 +979,8 @@ module Prism
def visit_lambda_node(node)
parameters =
case node.parameters
- when nil, NumberedParametersNode
- s(node, :args)
+ when nil, ItParametersNode, NumberedParametersNode
+ 0
else
visit(node.parameters)
end
@@ -1001,9 +1004,6 @@ module Prism
# foo = 1
# ^^^^^^^
- #
- # foo, bar = 1
- # ^^^ ^^^
def visit_local_variable_write_node(node)
s(node, :lasgn, node.name, visit_write_value(node.value))
end
@@ -1059,8 +1059,8 @@ module Prism
# A node that is missing from the syntax tree. This is only used in the
# case of a syntax error. The parser gem doesn't have such a concept, so
# we invent our own here.
- def visit_missing_node(node)
- raise "Cannot visit missing node directly"
+ def visit_error_recovery_node(node)
+ raise "Cannot visit error recovery node directly"
end
# module Foo; end
@@ -1073,14 +1073,18 @@ module Prism
visit(node.constant_path)
end
- if node.body.nil?
- s(node, :module, name)
- elsif node.body.is_a?(StatementsNode)
- compiler = copy_compiler(in_def: false)
- s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
- else
- s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
- end
+ result =
+ if node.body.nil?
+ s(node, :module, name)
+ elsif node.body.is_a?(StatementsNode)
+ compiler = copy_compiler(in_def: false)
+ s(node, :module, name).concat(node.body.body.map { |child| child.accept(compiler) })
+ else
+ s(node, :module, name, node.body.accept(copy_compiler(in_def: false)))
+ end
+
+ attach_comments(result, node)
+ result
end
# foo, bar = baz
@@ -1136,6 +1140,12 @@ module Prism
s(node, :nil)
end
+ # def foo(&nil); end
+ # ^^^^
+ def visit_no_block_parameter_node(node)
+ :"&nil"
+ end
+
# def foo(**nil); end
# ^^^^^
def visit_no_keywords_parameter_node(node)
@@ -1188,7 +1198,7 @@ module Prism
# ^^^^^^^^^
def visit_parameters_node(node)
children =
- node.compact_child_nodes.map do |element|
+ node.each_child_node.map do |element|
if element.is_a?(MultiTargetNode)
visit_destructured_parameter(element)
else
@@ -1440,6 +1450,7 @@ module Prism
unescaped = node.unescaped
if node.forced_binary_encoding?
+ unescaped = unescaped.dup
unescaped.force_encoding(Encoding::BINARY)
end
@@ -1536,6 +1547,17 @@ module Prism
private
+ # Attach prism comments to the given sexp.
+ def attach_comments(sexp, node)
+ return unless node.comments
+ return if node.comments.empty?
+
+ extra = node.location.start_line - node.comments.last.location.start_line
+ comments = node.comments.map(&:slice)
+ comments.concat([nil] * [0, extra].max)
+ sexp.comments = comments.join("\n")
+ end
+
# Create a new compiler with the given options.
def copy_compiler(in_def: self.in_def, in_pattern: self.in_pattern)
Compiler.new(file, in_def: in_def, in_pattern: in_pattern)
@@ -1558,7 +1580,7 @@ module Prism
else
parameters =
case block.parameters
- when nil, NumberedParametersNode
+ when nil, ItParametersNode, NumberedParametersNode
0
else
visit(block.parameters)
@@ -1614,6 +1636,14 @@ module Prism
translate(Prism.parse_file(filepath, partial_script: true), filepath)
end
+ # Parse the give file and translate it into the
+ # seattlerb/ruby_parser gem's Sexp format. This method is
+ # provided for API compatibility to RubyParser and takes an
+ # optional +timeout+ argument.
+ def process(ruby, file = "(string)", timeout = nil)
+ Timeout.timeout(timeout) { parse(ruby, file) }
+ end
+
class << self
# Parse the given source and translate it into the seattlerb/ruby_parser
# gem's Sexp format.
@@ -1638,6 +1668,7 @@ module Prism
raise ::RubyParser::SyntaxError, "#{filepath}:#{error.location.start_line} :: #{error.message}"
end
+ result.attach_comments!
result.value.accept(Compiler.new(filepath))
end
end